Skip to content

Commit 8d03450

Browse files
Google APIscopybara-github
authored andcommitted
docs: update client libraries for Enterprise OCR add-ons
PiperOrigin-RevId: 564763833
1 parent 2bd0e7d commit 8d03450

2 files changed

Lines changed: 47 additions & 2 deletions

File tree

google/cloud/documentai/v1beta3/document_io.proto

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,19 @@ message OcrConfig {
115115
repeated string language_hints = 1;
116116
}
117117

118+
// Configurations for premium OCR features.
119+
message PremiumFeatures {
120+
// Turn on selection mark detector in OCR engine. Only available in OCR 2.0+
121+
// processors.
122+
bool enable_selection_mark_detection = 3;
123+
124+
// Turn on font identification model and return font style information.
125+
bool compute_style_info = 4;
126+
127+
// Turn on the model that can extract LaTeX math formulas.
128+
bool enable_math_ocr = 5;
129+
}
130+
118131
// Hints for the OCR model.
119132
Hints hints = 2;
120133

@@ -139,6 +152,16 @@ message OcrConfig {
139152
// Includes symbol level OCR information if set to true.
140153
bool enable_symbol = 6;
141154

142-
// Turn on font id model and returns font style information.
143-
bool compute_style_info = 8;
155+
// Turn on font identification model and return font style information.
156+
// Deprecated, use
157+
// [PremiumFeatures.compute_style_info][google.cloud.documentai.v1beta3.OcrConfig.PremiumFeatures.compute_style_info]
158+
// instead.
159+
bool compute_style_info = 8 [deprecated = true];
160+
161+
// Turn off character box detector in OCR engine. Character box detection is
162+
// enabled by default in OCR 2.0+ processors.
163+
bool disable_character_boxes_detection = 10;
164+
165+
// Configurations for premium OCR features.
166+
PremiumFeatures premium_features = 11;
144167
}

google/cloud/documentai/v1beta3/document_processor_service.proto

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,28 @@ service DocumentProcessorService {
342342

343343
// Options for Process API
344344
message ProcessOptions {
345+
// A list of individual page numbers.
346+
message IndividualPageSelector {
347+
// Optional. Indices of the pages (starting from 1).
348+
repeated int32 pages = 1 [(google.api.field_behavior) = OPTIONAL];
349+
}
350+
351+
// A subset of pages to process. If not specified, all pages will be
352+
// processed. NOTICE: If any of the page range is set, we will extract and
353+
// process only the given pages from the document. In the output document,
354+
// the page_number is referring to the page number in the original document.
355+
oneof page_range {
356+
// Which pages to process (1-indexed).
357+
IndividualPageSelector individual_page_selector = 5;
358+
359+
// Only process certain pages from the start, process all if the document
360+
// has less pages.
361+
int32 from_start = 6;
362+
363+
// Only process certain pages from the end, same as above.
364+
int32 from_end = 7;
365+
}
366+
345367
// Only applicable to `OCR_PROCESSOR`. Returns error if set on other
346368
// processor types.
347369
OcrConfig ocr_config = 1;

0 commit comments

Comments
 (0)