All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DataExtractionModule.h
Go to the documentation of this file.
1 //---------------------------------------------------------------------------------------
2 // Copyright (c) 2001-2025 by Apryse Software Inc. All Rights Reserved.
3 // Consult legal.txt regarding legal and license information.
4 //---------------------------------------------------------------------------------------
5 // !Warning! This file is autogenerated, modify the .codegen file, not this one
6 // (any changes here will be wiped out during the autogen process)
7 
8 #ifndef PDFTRON_H_CPPPDFDataExtractionModule
9 #define PDFTRON_H_CPPPDFDataExtractionModule
10 #include <C/PDF/TRN_DataExtractionModule.h>
11 
13 #include <Common/BasicTypes.h>
14 #include <Common/UString.h>
15 #include <PDF/PDFDoc.h>
16 
17 namespace pdftron { namespace PDF {
18 
24 {
25 public:
30  {
31  e_Tabular = 0,
32  e_Form = 1,
36  };
44  static bool IsModuleAvailable(DataExtractionEngine engine);
45 
55  static UString ExtractData(const UString& input_pdf_file, DataExtractionEngine engine, DataExtractionOptions* options = 0);
56 
66  static void ExtractData(const UString& input_pdf_file, const UString& output_json_file, DataExtractionEngine engine, DataExtractionOptions* options = 0);
67 
75  static void DetectAndAddFormFieldsToPDF(PDFDoc& doc, DataExtractionOptions* options = 0);
76 
84  static void ExtractToXLSX(const UString& input_pdf_file, const UString& output_xlsx_file, DataExtractionOptions* options = 0);
85 
93  static void ExtractToXLSX(const UString& input_pdf_file, Filters::Filter& output_xlsx_stream, DataExtractionOptions* options = 0);
94 
95 };
96 
97 #include <Impl/DataExtractionModule.inl>
98 } //end PDF
99 } //end pdftron
100 
101 
102 #endif //PDFTRON_H_CPPPDFDataExtractionModule
static bool IsModuleAvailable(DataExtractionEngine engine)
Form field extraction engine. This engine uses artificial intelligence and computer vision to detect ...
static UString ExtractData(const UString &input_pdf_file, DataExtractionEngine engine, DataExtractionOptions *options=0)
Generic key value extraction engine. This engine uses artificial intelligence to detect arbitrary pai...
static void DetectAndAddFormFieldsToPDF(PDFDoc &doc, DataExtractionOptions *options=0)
Form field with key value extraction engine. This engine uses artificial intelligence and computer vi...
Tabular Data engine. This engine identifies column and row structure and analyzes numeric columns...
static void ExtractToXLSX(const UString &input_pdf_file, const UString &output_xlsx_file, DataExtractionOptions *options=0)
Document structure engine. This engine discovers the full logical structure, including headers...