PDF文書からテキスト、画像、署名などの情報を抽出できます。
他に、PDFの準拠レベル、暗号化や保護の有無、作成者、タイトル、作成日などのメタデータといった文書属性も抽出できます。
Toolbox Add-onのAPIリファレンスはこちらです。(すべて英文)
C#のサンプルプロジェクトではPdftools SDK(Toolbox Add-on)ライブラリ(DLL)をNuGetから自動でダウンロードします。
CのサンプルプロジェクトにはPdftools SDK(Toolbox Add-on)ライブラリ(DLL)が含まれています。
License Agreement(利用許諾契約書)が含まれていますので必ず確認してください。
このプロジェクトでは、ページ上の位置に基づいてテキスト コンテンツを単語や文に組み立てるヒューリスティック(発見的情報)の使用も示します。
PDFからページごとに抽出したテキストをコンソールに書き込みます。
2つのテキストが同じ単語に属しているかどうかをヒューリスティックに判断します。
// 入力PDFを開く using (Stream inStream = new FileStream(inPath, FileMode.Open, FileAccess.Read)) using (Document inDoc = Document.Open(inStream, null)) { int pageNumber = 1; // 各ページを処理 foreach (var inPage in inDoc.Pages) { Console.WriteLine("=========="); Console.WriteLine($"Page: {pageNumber++}"); Console.WriteLine("=========="); ContentExtractor extractor = new ContentExtractor(inPage.Content); extractor.Ungrouping = UngroupingSelection.All; // すべてのコンテンツ要素を反復処理し、テキスト要素のみを処理 foreach (ContentElement element in extractor) if (element is TextElement textElement) WriteText(textElement.Text); } }
private static void WriteText(Text text) { string textPart = ""; // すべてのテキストフラグメントを抽出 // 2つのテキストフラグメントを空白の有無で判断(英文の場合は単語ごとに空白で分割されるため) for (int iFragment = 0; iFragment < text.Count; iFragment++) { TextFragment currFragment = text[iFragment]; if (iFragment == 0) textPart += currFragment.Text; else { TextFragment lastFragment = text[iFragment - 1]; if (currFragment.CharacterSpacing != lastFragment.CharacterSpacing || currFragment.FontSize != lastFragment.FontSize || currFragment.HorizontalScaling != lastFragment.HorizontalScaling || currFragment.Rise != lastFragment.Rise || currFragment.WordSpacing != lastFragment.WordSpacing) textPart += $" {currFragment.Text}"; else { Point currentBotLeft = currFragment.Transform.TransformRectangle(currFragment.BoundingBox).BottomLeft; Point beforeBotRight = lastFragment.Transform.TransformRectangle(lastFragment.BoundingBox).BottomRight; if (beforeBotRight.X < currentBotLeft.X - 0.7 * currFragment.FontSize || beforeBotRight.Y < currentBotLeft.Y - 0.1 * currFragment.FontSize || currentBotLeft.Y < beforeBotRight.Y - 0.1 * currFragment.FontSize) textPart += $" {currFragment.Text}"; else textPart += currFragment.Text; } } } Console.WriteLine(textPart); }
def write_text(text: Text): """Reconstruct text heuristically from text fragments.""" text_part = [] # すべてのテキストフラグメントを抽出 # 2つのテキストフラグメントを空白の有無で判断(英文の場合は単語ごとに空白で分割されるため) for i_fragment, curr_fragment in enumerate(text): if i_fragment == 0: text_part.append(curr_fragment.text) else: last_fragment = text[i_fragment - 1] # Determine if there's a space between fragments if (curr_fragment.character_spacing != last_fragment.character_spacing or curr_fragment.font_size != last_fragment.font_size or curr_fragment.horizontal_scaling != last_fragment.horizontal_scaling or curr_fragment.rise != last_fragment.rise or curr_fragment.word_spacing != last_fragment.word_spacing): text_part.append(f" {curr_fragment.text}") else: current_bot_left = curr_fragment.transform.transform_rectangle(curr_fragment.bounding_box).bottom_left before_bot_right = last_fragment.transform.transform_rectangle(last_fragment.bounding_box).bottom_right if (before_bot_right.x < current_bot_left.x - 0.7 * curr_fragment.font_size or before_bot_right.y < current_bot_left.y - 0.1 * curr_fragment.font_size or current_bot_left.y < before_bot_right.y - 0.1 * curr_fragment.font_size): text_part.append(f" {curr_fragment.text}") else: text_part.append(curr_fragment.text) print("".join(text_part))
# 入力PDFを開く with open(input_file_path, "rb") as in_stream: with Document.open(in_stream, None) as in_doc: page_number = 1 # 各ページを処理 for in_page in in_doc.pages: print(f"==========\nPage: {page_number}\n==========") extractor = ContentExtractor(in_page.content) extractor.ungrouping = UngroupingSelection.ALL # すべてのコンテンツ要素を反復処理し、テキスト要素のみを処理 for element in extractor: if isinstance(element, TextElement): write_text(element.text) page_number += 1
画像抽出機能はPDFに埋め込まれた画像コンテンツ要素を画像ファイルとして出力します。
画像はBMP、JPEG、JPEG2000、JBIG2、PNG、GIF、TIFF形式で出力できます。
PDFに埋め込まれた画像データを記載された圧縮形式に応じてJPEG または TIFF として抽出します。
// Open input document pInStream = _tfopen(szInPath, _T("rb")); GOTO_CLEANUP_IF_NULL(pInStream, _T("Failed to open input file \"%s\".\n"), szInPath); PtxSysCreateFILEStreamDescriptor(&descriptor, pInStream, 0); pInDoc = PtxPdf_Document_Open(&descriptor, _T("")); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pInDoc, _T("Input file \"%s\" cannot be opened. %s (ErrorCode: 0x%08x).\n"), szInPath, szErrorBuff, Ptx_GetLastError()); // Loop over all pages and extract images pInPageList = PtxPdf_Document_GetPages(pInDoc); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pInPageList, _T("Failed to get the pages of the input document. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); for (int iPageNo = 0; iPageNo < PtxPdf_PageList_GetCount(pInPageList); iPageNo++) { pPage = PtxPdf_PageList_Get(pInPageList, iPageNo); pContent = PtxPdf_Page_GetContent(pPage); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pContent, _T("Failed to get content from page %d. %s (ErrorCode: 0x%08x).\n"), iPageNo + 1, szErrorBuff, Ptx_GetLastError()); pExtractor = PtxPdfContent_ContentExtractor_New(pContent); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pExtractor, _T("Failed to create content extractor. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); GOTO_CLEANUP_IF_FALSE_PRINT_ERROR(extractImages(pExtractor, iPageNo + 1, szOutputDir), _T("Error occurred while extracting images. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); if (pPage != NULL) { Ptx_Release(pPage); pPage = NULL; } if (pContent != NULL) { Ptx_Release(pContent); pContent = NULL; } }
int extractImages(TPtxPdfContent_ContentExtractor* pExtractor, int iPageNo, const TCHAR* szOutputDir) { int iImgCount = 0; int iImgMaskCount = 0; TPtxPdfContent_ContentExtractorIterator* pIterator = NULL; TPtxPdfContent_ContentElement* pContentElement = NULL; TPtxPdfContent_Image* pImage = NULL; TPtxPdfContent_ImageMask* pImageMask = NULL; TCHAR* szExtension = NULL; FILE* pOutStream = NULL; pIterator = PtxPdfContent_ContentExtractor_GetIterator(pExtractor); GOTO_CLEANUP_IF_NULL(pIterator, _T("Failed to get iterator.\n")); PtxPdfContent_ContentExtractorIterator_MoveNext(pIterator); while (pContentElement = PtxPdfContent_ContentExtractorIterator_GetValue(pIterator)) { TPtxPdfContent_ContentElementType iType = PtxPdfContent_ContentElement_GetType(pContentElement); if (iType == ePtxPdfContent_ContentElementType_ImageElement) { iImgCount++; pImage = PtxPdfContent_ImageElement_GetImage((TPtxPdfContent_ImageElement*)pContentElement); GOTO_CLEANUP_IF_NULL(pImage, _T("Failed to get image.\n")); const TPtxPdfContent_ImageType iImageType = PtxPdfContent_Image_GetDefaultImageType(pImage); if (iImageType == ePtxPdfContent_ImageType_Jpeg) szExtension = _T(".jpg"); else szExtension = _T(".tiff"); TCHAR szOutPath[256] = {'\0'}; _stprintf(szOutPath, _T("%s/image_page%d_%d%s"), szOutputDir, iPageNo, iImgCount, szExtension); pOutStream = _tfopen(szOutPath, _T("wb+")); GOTO_CLEANUP_IF_NULL(pOutStream, _T("Failed to open output file \"%s\".\n"), szOutPath); TPtxSys_StreamDescriptor outDescriptor; PtxSysCreateFILEStreamDescriptor(&outDescriptor, pOutStream, 0); if (PtxPdfContent_Image_Extract(pImage, &outDescriptor, NULL) == FALSE) { if (Ptx_GetLastError() == ePtx_Error_Generic) { nBufSize = Ptx_GetLastErrorMessage(NULL, 0); Ptx_GetLastErrorMessage(szErrorBuff, MIN(ARRAY_SIZE(szErrorBuff), nBufSize)); _tprintf(szErrorBuff); } else return FALSE; } if (pImage != NULL) { Ptx_Release(pImage); pImage = NULL; } if (pOutStream != NULL) { fclose(pOutStream); pOutStream = NULL; } } else if (iType == ePtxPdfContent_ContentElementType_ImageMaskElement) { iImgMaskCount++; pImageMask = PtxPdfContent_ImageMaskElement_GetImageMask((TPtxPdfContent_ImageMaskElement*)pContentElement); GOTO_CLEANUP_IF_NULL(pImageMask, _T("Failed to get image.\n")); szExtension = _T(".tiff"); TCHAR szOutPath[256] = {'\0'}; _stprintf(szOutPath, _T("%s/image_mask_page%d_%d%s"), szOutputDir, iPageNo, iImgMaskCount, szExtension); pOutStream = _tfopen(szOutPath, _T("wb+")); GOTO_CLEANUP_IF_NULL(pOutStream, _T("Failed to open output file \"%s\".\n"), szOutPath); TPtxSys_StreamDescriptor outDescriptor; PtxSysCreateFILEStreamDescriptor(&outDescriptor, pOutStream, 0); if (PtxPdfContent_ImageMask_Extract(pImageMask, &outDescriptor, NULL) == FALSE) { if (Ptx_GetLastError() == ePtx_Error_Generic) { nBufSize = Ptx_GetLastErrorMessage(NULL, 0); Ptx_GetLastErrorMessage(szErrorBuff, MIN(ARRAY_SIZE(szErrorBuff), nBufSize)); _tprintf(szErrorBuff); } else return FALSE; } if (pImageMask != NULL) { Ptx_Release(pImageMask); pImageMask = NULL; } if (pOutStream != NULL) { fclose(pOutStream); pOutStream = NULL; } } if (pContentElement != NULL) { Ptx_Release(pContentElement); pContentElement = NULL; } PtxPdfContent_ContentExtractorIterator_MoveNext(pIterator); } cleanup: if (pImage != NULL) Ptx_Release(pImage); if (pImageMask != NULL) Ptx_Release(pImageMask); if (pContentElement != NULL) Ptx_Release(pContentElement); if (pIterator != NULL) Ptx_Release(pIterator); if (pOutStream != NULL) fclose(pOutStream); return iReturnValue == 1 ? FALSE : TRUE; }
// Open input document using (Stream stream = new FileStream(inPath, FileMode.Open, FileAccess.Read)) using (Document doc = Document.Open(stream, null)) { // Loop over all pages and extract images for (int i = 0; i < doc.Pages.Count; i++) { ContentExtractor extractor = new ContentExtractor(doc.Pages[i].Content); ExtractImages(extractor, i + 1, outputDir); } }
private static void ExtractImages(ContentExtractor extractor, int pageNo, string outputDir) { int imgCount = 0; int imgMaskCount = 0; foreach (ContentElement contentElement in extractor) { if (contentElement is ImageElement element) { imgCount++; string extension = ".tiff"; switch (element.Image.DefaultImageType) { case ImageType.Jpeg: extension = ".jpg"; break; case ImageType.Tiff: extension = ".tiff"; break; default: break; } string outputPath = System.IO.Path.Combine(outputDir, $"image_page{pageNo}_{imgCount}{extension}"); try { using (Stream imageStream = new FileStream(outputPath, FileMode.Create, FileAccess.ReadWrite)) { element.Image.Extract(imageStream); } } catch (GenericException ex) { Console.WriteLine(ex.ToString()); } } else if (contentElement is ImageMaskElement maskElement) { imgMaskCount++; string extension = ".tiff"; string outputPath = System.IO.Path.Combine(outputDir, $"image_mask_page{pageNo}_{imgMaskCount}{extension}"); try { using (Stream imageStream = new FileStream(outputPath, FileMode.Create, FileAccess.ReadWrite)) { maskElement.ImageMask.Extract(imageStream); } } catch (GenericException ex) { Console.WriteLine(ex.ToString()); } } } }
def extract_image(image_element: ImageElement, output_path: str): with open(output_path, "wb+") as out_stream: image_element.image.extract(out_stream)
def extract_image_mask(image_mask_element: ImageMaskElement, output_path: str): with open(output_path, "wb+") as out_stream: image_mask_element.image_mask.extract(out_stream)
def process_page_content(page: Page, page_number: int, output_dir: str): extractor = ContentExtractor(page.content) img_count = 0 mask_count = 0 for content_element in extractor: # Extract image elements if isinstance(content_element, ImageElement): img_count += 1 image_type = content_element.image.default_image_type extension = ".jpg" if image_type == ImageType.JPEG else ".tiff" output_path = os.path.join(output_dir, f"image_page{page_number}_{img_count}{extension}") extract_image(content_element, output_path) print(f"Extracted image: {output_path}") # Extract image masks elif isinstance(content_element, ImageMaskElement): mask_count += 1 output_path = os.path.join(output_dir, f"image_mask_page{page_number}_{mask_count}.tiff") extract_image_mask(content_element, output_path) print(f"Extracted image mask: {output_path}")
# Ensure the output directory exists os.makedirs(output_dir, exist_ok=True) # Open input document with io.FileIO(input_file_path, "rb") as in_stream: with Document.open(in_stream, None) as in_doc: for page_number, page in enumerate(in_doc.pages, start=1): process_page_content(page, page_number, output_dir)
名前、日付、連絡先などの署名情報を自動的に抽出します。
PDF内のすべての署名フィールドとそのプロパティを一覧表示します。
// 入力PDFを開く pInStream = _tfopen(szInPath, _T("rb")); GOTO_CLEANUP_IF_NULL(pInStream, _T("Failed to open input file \"%s\".\n"), szInPath); PtxSysCreateFILEStreamDescriptor(&descriptor, pInStream, 0); pInDoc = PtxPdf_Document_Open(&descriptor, _T("")); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pInDoc, _T("Input file \"%s\" cannot be opened. %s (ErrorCode: 0x%08x).\n"), szInPath, szErrorBuff, Ptx_GetLastError()); // 入力PDFの電子署名を取得 pSignatureFields = PtxPdf_Document_GetSignatureFields(pInDoc); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pSignatureFields, _T("Failed to get signatures of input PDF. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); _tprintf(_T("Number of signature fields: %d\n"), PtxPdfForms_SignatureFieldList_GetCount(pSignatureFields)); for (int i = 0; i < PtxPdfForms_SignatureFieldList_GetCount(pSignatureFields); i++) { pSig = PtxPdfForms_SignatureFieldList_Get(pSignatureFields, i); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pSig, _T("Failed to get signature. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); TPtxPdfForms_SignatureFieldType iFieldType = PtxPdfForms_SignatureField_GetType(pSig); if (iFieldType == ePtxPdfForms_SignatureFieldType_Signature || iFieldType == ePtxPdfForms_SignatureFieldType_DocMdpSignature || iFieldType == ePtxPdfForms_SignatureFieldType_DocumentSignature) { // Name(名前)をリスト size_t nName = PtxPdfForms_SignedSignatureField_GetName(pSig, NULL, 0); _tprintf(_T("- %s fields"), PtxPdfForms_SignatureField_IsVisible(pSig) ? _T("Visible") : _T("Invisible")); if (nName != 0) { TCHAR* szName = (TCHAR*)malloc(nName * sizeof(TCHAR)); if (szName != NULL) { PtxPdfForms_SignedSignatureField_GetName(pSig, szName, nName); _tprintf(_T(", signed by: %s"), szName); free(szName); } } _tprintf(_T("\n")); // Location(ロケーション)をリスト size_t nLocation = PtxPdfForms_Signature_GetLocation(pSig, NULL, 0); if (nLocation != 0) { TCHAR* szLocation = (TCHAR*)malloc(nLocation * sizeof(TCHAR)); if (szLocation != NULL) { PtxPdfForms_Signature_GetLocation(pSig, szLocation, nLocation); _tprintf(_T(" - Location: %s\n"), szLocation); free(szLocation); } } // Reason(理由)をリスト size_t nReason = PtxPdfForms_Signature_GetReason(pSig, NULL, 0); if (nReason != 0) { TCHAR* szReason = (TCHAR*)malloc(nReason * sizeof(TCHAR)); if (szReason != NULL) { PtxPdfForms_Signature_GetReason(pSig, szReason, nReason); _tprintf(_T(" - Reason: %s\n"), szReason); free(szReason); } } // Contact(連絡先)情報をリスト size_t nContactInfo = PtxPdfForms_Signature_GetContactInfo(pSig, NULL, 0); if (nContactInfo != 0) { TCHAR* szContactInfo = (TCHAR*)malloc(nContactInfo * sizeof(TCHAR)); if (szContactInfo != NULL) { PtxPdfForms_Signature_GetContactInfo(pSig, szContactInfo, nContactInfo); _tprintf(_T(" - Contact info: %s\n"), szContactInfo); free(szContactInfo); } } // Date(日時)をリスト if (PtxPdfForms_SignedSignatureField_GetDate(pSig, &date) == TRUE) { _tprintf(_T(" - Date: %02d-%02d-%d %02d:%02d:%02d%c%02d:%02d\n"), date.iYear, date.iMonth, date.iDay, date.iHour, date.iMinute, date.iSecond, date.iTZSign >= 0 ? '+' : '-', date.iTZHour, date.iTZMinute); } } else { _tprintf(_T("- %s field, not signed\n"), PtxPdfForms_SignatureField_IsVisible(pSig) ? _T("Visible") : _T("Invisible")); } }
// 入力PDFを開く using (Stream inStream = new FileStream(inPath, FileMode.Open, FileAccess.Read)) using (Document inDoc = Document.Open(inStream, null)) { SignatureFieldList signatureFields = inDoc.SignatureFields; Console.WriteLine("Number of signature fields: {0}", signatureFields.Count); foreach (SignatureField field in signatureFields) { if (field is Signature sig) { // Name(名前)をリスト string name = sig.Name; Console.WriteLine("- {0} fields, signed by: {1}", sig.IsVisible ? "Visible" : "Invisible", name ?? "(Unknown name)"); // Location(ロケーション)をリスト string location = sig.Location; if (location != null) Console.WriteLine(" - Location: {0}", location); // Reason(理由)をリスト string reason = sig.Reason; if (reason != null) Console.WriteLine(" - Reason: {0}", reason); // Contact(連絡先)情報をリスト string contactInfo = sig.ContactInfo; if (contactInfo != null) Console.WriteLine(" - Contact info: {0}", contactInfo); // Date(日時)をリスト DateTimeOffset? date = sig.Date; if (date != null) Console.WriteLine(" - Date: {0}", date.Value); } else Console.WriteLine("- {0} field, not signed", field.IsVisible ? "Visible" : "Invisible"); } }
def list_signatures(in_doc: Document): # 署名フィールドのリストを取得 signature_fields = in_doc.signature_fields print(f"Number of signature fields: {len(signature_fields)}") for field in signature_fields: if isinstance(field, Signature): # Name(名前)をリスト name = field.name or "(Unknown name)" print(f"- {'Visible' if field.is_visible else 'Invisible'} field, signed by: {name}") # Location(ロケーション)をリスト if field.location: print(f" - Location: {field.location}") # Reason(理由)をリスト if field.reason: print(f" - Reason: {field.reason}") # Contact(連絡先)情報をリスト if field.contact_info: print(f" - Contact info: {field.contact_info}") # Date(日時)をリスト if field.date: print(f" - Date: {field.date}") else: print(f"- {'Visible' if field.is_visible else 'Invisible'} field, not signed")
# 入力PDFを開く with io.FileIO(input_file_path, "rb") as in_stream: with Document.open(in_stream, None) as in_doc: # PDF文書のすべての署名を一覧表示 list_signatures(in_doc)
PDFの属性(PF準拠性や暗号化情報)とメタデータ(作成者、タイトル、作成日など)を一覧表示します。
// 入力PDFを開く pInStream = _tfopen(szInPath, _T("rb")); GOTO_CLEANUP_IF_NULL(pInStream, _T("Failed to open input file \"%s\".\n"), szInPath); PtxSysCreateFILEStreamDescriptor(&descriptor, pInStream, 0); pInDoc = PtxPdf_Document_Open(&descriptor, szPassword); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pInDoc, _T("Input file \"%s\" cannot be opened. %s (ErrorCode: 0x%08x).\n"), szInPath, szErrorBuff, Ptx_GetLastError()); // PDF バージョン TPtxPdf_Conformance conformance = PtxPdf_Document_GetConformance(pInDoc); if (conformance == 0) { GOTO_CLEANUP(szErrorBuff, Ptx_GetLastError()); } _tprintf(_T("Conformance: ")); switch (conformance) { case ePtxPdf_Conformance_Pdf10: _tprintf(_T("PDF 1.0\n")); break; case ePtxPdf_Conformance_Pdf11: _tprintf(_T("PDF 1.1\n")); break; case ePtxPdf_Conformance_Pdf12: _tprintf(_T("PDF 1.2\n")); break; case ePtxPdf_Conformance_Pdf13: _tprintf(_T("PDF 1.3\n")); break; case ePtxPdf_Conformance_Pdf14: _tprintf(_T("PDF 1.4\n")); break; case ePtxPdf_Conformance_Pdf15: _tprintf(_T("PDF 1.5\n")); break; case ePtxPdf_Conformance_Pdf16: _tprintf(_T("PDF 1.6\n")); break; case ePtxPdf_Conformance_Pdf17: _tprintf(_T("PDF 1.7\n")); break; case ePtxPdf_Conformance_Pdf20: _tprintf(_T("PDF 2.0\n")); break; case ePtxPdf_Conformance_PdfA1B: _tprintf(_T("PDF/A1-b\n")); break; case ePtxPdf_Conformance_PdfA1A: _tprintf(_T("PDF/A1-a\n")); break; case ePtxPdf_Conformance_PdfA2B: _tprintf(_T("PDF/A2-b\n")); break; case ePtxPdf_Conformance_PdfA2U: _tprintf(_T("PDF/A2-u\n")); break; case ePtxPdf_Conformance_PdfA2A: _tprintf(_T("PDF/A2-a\n")); break; case ePtxPdf_Conformance_PdfA3B: _tprintf(_T("PDF/A3-b\n")); break; case ePtxPdf_Conformance_PdfA3U: _tprintf(_T("PDF/A3-u\n")); break; case ePtxPdf_Conformance_PdfA3A: _tprintf(_T("PDF/A3-a\n")); break; } // 暗号化情報 TPtxPdf_Permission permissions; BOOL iRet = PtxPdf_Document_GetPermissions(pInDoc, &permissions); if (iRet == FALSE) { if (Ptx_GetLastError() != ePtx_Error_Success) GOTO_CLEANUP(szErrorBuff, Ptx_GetLastError()); _tprintf(_T("Not encrypted\n")); } else { _tprintf(_T("Encryption:\n")); _tprintf(_T(" - Permissions: ")); if (permissions & ePtxPdf_Permission_Print) _tprintf(_T("Print, ")); if (permissions & ePtxPdf_Permission_Modify) _tprintf(_T("Modify, ")); if (permissions & ePtxPdf_Permission_Copy) _tprintf(_T("Copy, ")); if (permissions & ePtxPdf_Permission_Annotate) _tprintf(_T("Annotate, ")); if (permissions & ePtxPdf_Permission_FillForms) _tprintf(_T("FillForms, ")); if (permissions & ePtxPdf_Permission_SupportDisabilities) _tprintf(_T("SupportDisabilities, ")); if (permissions & ePtxPdf_Permission_Assemble) _tprintf(_T("Assemble, ")); if (permissions & ePtxPdf_Permission_DigitalPrint) _tprintf(_T("DigitalPrint, ")); _tprintf(_T("\n")); } // 入力PDFのメタデータを取得 pMetadata = PtxPdf_Document_GetMetadata(pInDoc); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pMetadata, _T("Failed to get metadata. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); _tprintf(_T("Document information:\n")); // Title(タイトル)を取得 size_t nTitle = PtxPdf_Metadata_GetTitle(pMetadata, NULL, 0); if (nTitle != 0) { TCHAR* szTitle = (TCHAR*)malloc(nTitle * sizeof(TCHAR)); if (szTitle != NULL) { PtxPdf_Metadata_GetTitle(pMetadata, szTitle, nTitle); _tprintf(_T(" - Title: %s\n"), szTitle); free(szTitle); } } // Author(著者)を取得 size_t nAuthor = PtxPdf_Metadata_GetAuthor(pMetadata, NULL, 0); if (nAuthor != 0) { TCHAR* szAuthor = (TCHAR*)malloc(nAuthor * sizeof(TCHAR)); if (szAuthor != NULL) { PtxPdf_Metadata_GetAuthor(pMetadata, szAuthor, nAuthor); _tprintf(_T(" - Author: %s\n"), szAuthor); free(szAuthor); } } // Creator(作成者)を取得 size_t nCreator = PtxPdf_Metadata_GetCreator(pMetadata, NULL, 0); if (nCreator != 0) { TCHAR* szCreator = (TCHAR*)malloc(nCreator * sizeof(TCHAR)); if (szCreator != NULL) { PtxPdf_Metadata_GetCreator(pMetadata, szCreator, nCreator); _tprintf(_T(" - Creator: %s\n"), szCreator); free(szCreator); } } // Producerを取得 size_t nProducer = PtxPdf_Metadata_GetProducer(pMetadata, NULL, 0); if (nProducer != 0) { TCHAR* szProducer = (TCHAR*)malloc(nProducer * sizeof(TCHAR)); if (szProducer != NULL) { PtxPdf_Metadata_GetProducer(pMetadata, szProducer, nProducer); _tprintf(_T(" - Producer: %s\n"), szProducer); free(szProducer); } } // Subjectを取得 size_t nSubject = PtxPdf_Metadata_GetSubject(pMetadata, NULL, 0); if (nSubject != 0) { TCHAR* szSubject = (TCHAR*)malloc(nSubject * sizeof(TCHAR)); if (szSubject != NULL) { PtxPdf_Metadata_GetSubject(pMetadata, szSubject, nSubject); _tprintf(_T(" - Subject: %s\n"), szSubject); free(szSubject); } } // Keywordsを取得 size_t nKeywords = PtxPdf_Metadata_GetKeywords(pMetadata, NULL, 0); if (nKeywords != 0) { TCHAR* szKeywords = (TCHAR*)malloc(nKeywords * sizeof(TCHAR)); if (szKeywords != NULL) { PtxPdf_Metadata_GetKeywords(pMetadata, szKeywords, nKeywords); _tprintf(_T(" - Keywords: %s\n"), szKeywords); free(szKeywords); } } // 作成日時を取得 if (PtxPdf_Metadata_GetCreationDate(pMetadata, &date) == TRUE) { _tprintf(_T(" - Creation Date: %02d-%02d-%d %02d:%02d:%02d%c%02d:%02d\n"), date.iYear, date.iMonth, date.iDay, date.iHour, date.iMinute, date.iSecond, date.iTZSign >= 0 ? '+' : '-', date.iTZHour, date.iTZMinute); } // 変更日時を取得 if (PtxPdf_Metadata_GetModificationDate(pMetadata, &date) == TRUE) { _tprintf(_T(" - Modification Date: %02d-%02d-%d %02d:%02d:%02d%c%02d:%02d\n"), date.iYear, date.iMonth, date.iDay, date.iHour, date.iMinute, date.iSecond, date.iTZSign >= 0 ? '+' : '-', date.iTZHour, date.iTZMinute); } // 独自のエントリを取得 _tprintf(_T("Custom entries:\n")); TPtx_StringMap* pCustomEntries = PtxPdf_Metadata_GetCustomEntries(pMetadata); GOTO_CLEANUP_IF_NULL_PRINT_ERROR(pCustomEntries, _T("Failed to get custom entries. %s (ErrorCode: 0x%08x).\n"), szErrorBuff, Ptx_GetLastError()); for (int i = Ptx_StringMap_GetBegin(pCustomEntries), iEnd = Ptx_StringMap_GetEnd(pCustomEntries); i != iEnd; i = Ptx_StringMap_GetNext(pCustomEntries, i)) { size_t nKeySize = Ptx_StringMap_GetKey(pCustomEntries, i, NULL, 0); TCHAR* szKey = (TCHAR*)malloc(nKeySize * sizeof(TCHAR)); nKeySize = Ptx_StringMap_GetKey(pCustomEntries, i, szKey, nKeySize); size_t nValueSize = Ptx_StringMap_GetValue(pCustomEntries, i, NULL, 0); TCHAR* szValue = (TCHAR*)malloc(nValueSize * sizeof(TCHAR)); nValueSize = Ptx_StringMap_GetValue(pCustomEntries, i, szValue, nValueSize); if (szKey && nKeySize && szValue && nValueSize) _tprintf(_T(" - %s: %s\n"), szKey, szValue); free(szKey); free(szValue); }
// 入力PDFを開く using (Stream inStream = new FileStream(inPath, FileMode.Open, FileAccess.Read)) using (Document inDoc = Document.Open(inStream, password)) { // PDF バージョン Console.WriteLine("Conformance: {0}", inDoc.Conformance.ToString()); // 暗号化情報 Permission? permissions = inDoc.Permissions; if (!permissions.HasValue) { Console.WriteLine("Not encrypted"); } else { Console.WriteLine("Encryption:"); Console.Write(" - Permissions: "); foreach (Enum flag in Enum.GetValues(typeof(Permission))) if (permissions.Value.HasFlag(flag)) Console.Write("{0}, ", flag.ToString()); Console.WriteLine(); } // メタデータを取得 Metadata metadata = inDoc.Metadata; Console.WriteLine("Document information:"); // Title(タイトル)を取得 string title = metadata.Title; if (title != null) Console.WriteLine(" - Title: {0}", title); // Author(著者)を取得 string author = metadata.Author; if (author != null) Console.WriteLine(" - Author: {0}", author); // Subjectを取得 string subject = metadata.Subject; if (subject != null) Console.WriteLine(" - Subject: {0}", subject); // Keywordsを取得 string keywords = metadata.Keywords; if (keywords != null) Console.WriteLine(" - Keywords: {0}", keywords); // 作成日時を取得 DateTimeOffset? creationDate = metadata.CreationDate; if (creationDate != null) Console.WriteLine(" - Creation Date: {0}", creationDate); // 変更日時を取得 DateTimeOffset? modificationDate = metadata.ModificationDate; if (modificationDate != null) Console.WriteLine(" - Modification Date: {0}", modificationDate); // Creator(作成者)を取得 string creator = metadata.Creator; if (creator != null) Console.WriteLine(" - Creator: {0}", creator); // Producerを取得 string producer = metadata.Producer; if (producer != null) Console.WriteLine(" - Producer: {0}", producer); // 独自のエントリを取得 Console.WriteLine("Custom entries:"); foreach (var entry in metadata.CustomEntries) Console.WriteLine(" - {0}: {1}", entry.Key, entry.Value); }
def display_permissions(permissions: int): """Display encryption permissions in a readable format.""" # アクティブな権限名を表示 active_permissions = [perm.name for perm in Permission if permissions & perm] for perm in active_permissions: print(f" - {perm}")
def list_pdf_info(input_doc: Document): """ List document information and metadata of the given PDF. """ # PDFバージョン print(f"Conformance: {input_doc.conformance.name}") # 暗号化情報 permissions = input_doc.permissions if permissions is None: print("Not encrypted") else: display_permissions(permissions) # メタデータを取得 metadata = input_doc.metadata print("Document information:") # 標準のメタデータを表示 if metadata.title: print(f" - Title: {metadata.title}") if metadata.author: print(f" - Author: {metadata.author}") if metadata.subject: print(f" - Subject: {metadata.subject}") if metadata.keywords: print(f" - Keywords: {metadata.keywords}") if metadata.creation_date: print(f" - Creation Date: {metadata.creation_date}") if metadata.modification_date: print(f" - Modification Date: {metadata.modification_date}") if metadata.creator: print(f" - Creator: {metadata.creator}") if metadata.producer: print(f" - Producer: {metadata.producer}") # 独自のエントリを表示 print("Custom entries:") for key, value in metadata.custom_entries.items(): print(f" - {key}: {value}")
# 入力PDFを開く with io.FileIO(input_file_path, "rb") as in_stream: with Document.open(in_stream, pdf_password) as in_doc: # PDFを処理 list_pdf_info(in_doc)
他の機能サンプルを参照してください。
質問のページからお送りいただくようお願いします。
または、メールでsupport@trustss.co.jpあてにお送りください。
ご購入前の技術的質問も無償で対応します。サポート受付ページからお願いします。