Skip to content

Commit 1da5299

Browse files
Google APIscopybara-github
authored andcommitted
feat: A new message FoundationModelTuningOptions is added
feat: A new field foundation_model_tuning_options is added to message TrainProcessorVersionRequest feat: A new field `labels` is added to messages `ProcessRequest` and `BatchProcessRequest` feat: A new field `display_name` is added to message `DocumentSchema` fix: deprecate `Dataset.document_warehouse_config` docs: updated comments PiperOrigin-RevId: 607358355
1 parent 9a9bc9b commit 1da5299

5 files changed

Lines changed: 74 additions & 32 deletions

File tree

google/cloud/documentai/v1beta3/dataset.proto

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,10 @@ message Dataset {
9494
GCSManagedConfig gcs_managed_config = 3
9595
[(google.api.field_behavior) = OPTIONAL];
9696

97-
// Optional. Document AI Warehouse-based dataset configuration.
97+
// Optional. Deprecated. Warehouse-based dataset configuration is not
98+
// supported.
9899
DocumentWarehouseConfig document_warehouse_config = 5
99-
[(google.api.field_behavior) = OPTIONAL];
100+
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
100101

101102
// Optional. Unmanaged dataset configuration. Use this configuration if the
102103
// dataset documents are managed by the document service internally (not

google/cloud/documentai/v1beta3/document.proto

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,8 @@ message Document {
758758
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
759759

760760
// Optional. Identifies the bounding polygon of a layout element on the
761-
// page.
761+
// page. If `layout_type` is set, the bounding polygon must be exactly the
762+
// same to the layout element it's referring to.
762763
BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
763764

764765
// Optional. Confidence of detected page element, if applicable. Range

google/cloud/documentai/v1beta3/document_processor_service.proto

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -349,12 +349,11 @@ message ProcessOptions {
349349
}
350350

351351
// A subset of pages to process. If not specified, all pages are processed.
352-
// If a page range is set, only the given pages are extracted and processed
353-
// from the document. In the output document,
354-
// [Document.Page.page_number][google.cloud.documentai.v1beta3.Document.Page.page_number]
355-
// refers to the page number in the original document. This configuration
356-
// only applies to sync requests. `page_range` can be only one of the
357-
// following:
352+
// If a page range is set, only the given pages are extracted and processed
353+
// from the document. In the output document,
354+
// [Document.Page.page_number][google.cloud.documentai.v1beta3.Document.Page.page_number]
355+
// refers to the page number in the original document. This configuration
356+
// only applies to sync requests.
358357
oneof page_range {
359358
// Which pages to process (1-indexed).
360359
IndividualPageSelector individual_page_selector = 5;
@@ -367,8 +366,8 @@ message ProcessOptions {
367366
int32 from_end = 7;
368367
}
369368

370-
// Only applicable to `OCR_PROCESSOR`. Returns error if set on other
371-
// processor types.
369+
// Only applicable to `OCR_PROCESSOR` and `FORM_PARSER_PROCESSOR`.
370+
// Returns error if set on other processor types.
372371
OcrConfig ocr_config = 1;
373372

374373
// Optional. Override the schema of the
@@ -428,6 +427,14 @@ message ProcessRequest {
428427

429428
// Inference-time options for the process API
430429
ProcessOptions process_options = 7;
430+
431+
// Optional. The labels with user-defined metadata for the request.
432+
//
433+
// Label keys and values can be no longer than 63 characters
434+
// (Unicode codepoints) and can only contain lowercase letters, numeric
435+
// characters, underscores, and dashes. International characters are allowed.
436+
// Label values are optional. Label keys must start with a letter.
437+
map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
431438
}
432439

433440
// The status of human review on a processed document.
@@ -550,6 +557,14 @@ message BatchProcessRequest {
550557

551558
// Inference-time options for the process API
552559
ProcessOptions process_options = 7;
560+
561+
// Optional. The labels with user-defined metadata for the request.
562+
//
563+
// Label keys and values can be no longer than 63 characters
564+
// (Unicode codepoints) and can only contain lowercase letters, numeric
565+
// characters, underscores, and dashes. International characters are allowed.
566+
// Label values are optional. Label keys must start with a letter.
567+
map<string, string> labels = 9 [(google.api.field_behavior) = OPTIONAL];
553568
}
554569

555570
// Response message for
@@ -878,7 +893,8 @@ message CreateProcessorRequest {
878893

879894
// Required. The processor to be created, requires
880895
// [Processor.type][google.cloud.documentai.v1beta3.Processor.type] and
881-
// [Processor.display_name]][] to be set. Also, the
896+
// [Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
897+
// to be set. Also, the
882898
// [Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
883899
// field must be set if the processor is under CMEK.
884900
Processor processor = 2 [(google.api.field_behavior) = REQUIRED];
@@ -1027,9 +1043,24 @@ message TrainProcessorVersionRequest {
10271043
TrainingMethod training_method = 3;
10281044
}
10291045

1046+
// Options to control foundation model tuning of the processor.
1047+
message FoundationModelTuningOptions {
1048+
// Optional. The number of steps to run for model tuning. Valid values are
1049+
// between 1 and 400. If not provided, recommended steps will be used.
1050+
int32 train_steps = 2 [(google.api.field_behavior) = OPTIONAL];
1051+
1052+
// Optional. The multiplier to apply to the recommended learning rate. Valid
1053+
// values are between 0.1 and 10. If not provided, recommended learning rate
1054+
// will be used.
1055+
float learning_rate_multiplier = 3 [(google.api.field_behavior) = OPTIONAL];
1056+
}
1057+
10301058
oneof processor_flags {
10311059
// Options to control Custom Document Extraction (CDE) Processor.
10321060
CustomDocumentExtractionOptions custom_document_extraction_options = 5;
1061+
1062+
// Options to control foundation model tuning of a processor.
1063+
FoundationModelTuningOptions foundation_model_tuning_options = 12;
10331064
}
10341065

10351066
// Required. The parent (project, location and processor) to create the new

google/cloud/documentai/v1beta3/document_schema.proto

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,14 @@ message DocumentSchema {
9696
// Defines properties that can be part of the entity type.
9797
message Property {
9898
// Types of occurrences of the entity type in the document. This
99-
// represents the number of instances of instances of an entity, not
100-
// number of mentions of an entity. For example, a bank statement may
101-
// only have one `account_number`, but this account number may be
102-
// mentioned in several places on the document. In this case the
103-
// 'account_number' would be considered a `REQUIRED_ONCE` entity type. If,
104-
// on the other hand, we expect a bank statement to contain the status of
105-
// multiple different accounts for the customers, the occurrence type will
106-
// be set to `REQUIRED_MULTIPLE`.
99+
// represents the number of instances, not mentions, of an entity.
100+
// For example, a bank statement might only have one
101+
// `account_number`, but this account number can be mentioned in several
102+
// places on the document. In this case, the `account_number` is
103+
// considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
104+
// expect a bank statement to contain the status of multiple different
105+
// accounts for the customers, the occurrence type is set to
106+
// `REQUIRED_MULTIPLE`.
107107
enum OccurrenceType {
108108
// Unspecified occurrence type.
109109
OCCURRENCE_TYPE_UNSPECIFIED = 0;
@@ -127,6 +127,9 @@ message DocumentSchema {
127127
// EntityType name.
128128
string name = 1;
129129

130+
// User defined name for the property.
131+
string display_name = 6;
132+
130133
// A reference to the value type of the property. This type is subject
131134
// to the same conventions as the `Entity.base_types` field.
132135
string value_type = 2;

google/cloud/documentai/v1beta3/document_service.proto

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -135,18 +135,18 @@ enum DatasetSplitType {
135135
DATASET_SPLIT_UNASSIGNED = 3;
136136
}
137137

138-
// Describes the labelling status of a document.
138+
// Describes the labeling status of a document.
139139
enum DocumentLabelingState {
140140
// Default value if the enum is not set.
141141
DOCUMENT_LABELING_STATE_UNSPECIFIED = 0;
142142

143-
// Document has been labelled.
143+
// Document has been labeled.
144144
DOCUMENT_LABELED = 1;
145145

146-
// Document has not been labelled.
146+
// Document has not been labeled.
147147
DOCUMENT_UNLABELED = 2;
148148

149-
// Document has been auto-labelled.
149+
// Document has been auto-labeled.
150150
DOCUMENT_AUTO_LABELED = 3;
151151
}
152152

@@ -315,8 +315,9 @@ message ListDocumentsRequest {
315315
// - String match is case sensitive (for filter `DisplayName` & `EntityType`).
316316
string filter = 4 [(google.api.field_behavior) = OPTIONAL];
317317

318-
// Optional. Controls if the ListDocuments request requires a total size
319-
// of matched documents. See ListDocumentsResponse.total_size.
318+
// Optional. Controls if the request requires a total size of matched
319+
// documents. See
320+
// [ListDocumentsResponse.total_size][google.cloud.documentai.v1beta3.ListDocumentsResponse.total_size].
320321
//
321322
// Enabling this flag may adversely impact performance.
322323
//
@@ -325,19 +326,24 @@ message ListDocumentsRequest {
325326

326327
// Optional. Number of results to skip beginning from the `page_token` if
327328
// provided. https://google.aip.dev/158#skipping-results. It must be a
328-
// non-negative integer. Negative values wil be rejected. Note that this is
329+
// non-negative integer. Negative values will be rejected. Note that this is
329330
// not the number of pages to skip. If this value causes the cursor to move
330-
// past the end of results, `ListDocumentsResponse.document_metadata` and
331-
// `ListDocumentsResponse.next_page_token` will be empty.
331+
// past the end of results,
332+
// [ListDocumentsResponse.document_metadata][google.cloud.documentai.v1beta3.ListDocumentsResponse.document_metadata]
333+
// and
334+
// [ListDocumentsResponse.next_page_token][google.cloud.documentai.v1beta3.ListDocumentsResponse.next_page_token]
335+
// will be empty.
332336
int32 skip = 8 [(google.api.field_behavior) = OPTIONAL];
333337
}
334338

335339
message ListDocumentsResponse {
336340
// Document metadata corresponding to the listed documents.
337341
repeated DocumentMetadata document_metadata = 1;
338342

339-
// A token, which can be sent as `page_token` to retrieve the next page.
340-
// If this field is omitted, there are no subsequent pages.
343+
// A token, which can be sent as
344+
// [ListDocumentsRequest.page_token][google.cloud.documentai.v1beta3.ListDocumentsRequest.page_token]
345+
// to retrieve the next page. If this field is omitted, there are no
346+
// subsequent pages.
341347
string next_page_token = 2;
342348

343349
// Total count of documents queried.
@@ -430,7 +436,7 @@ message DocumentMetadata {
430436
// Type of the dataset split to which the document belongs.
431437
DatasetSplitType dataset_type = 3;
432438

433-
// Labelling state of the document.
439+
// Labeling state of the document.
434440
DocumentLabelingState labeling_state = 5;
435441

436442
// The display name of the document.

0 commit comments

Comments
 (0)