Skip to content

Commit 6b780a3

Browse files
Google APIscopybara-github
authored andcommitted
feat: added TrainProcessorVersion, EvaluateProcessorVersion, GetEvaluation, and ListEvaluations v1beta3 APIs
feat: added evaluation.proto feat: added document_schema field in ProcessorVersion processor.proto feat: added image_quality_scores field in Document.Page in document.proto feat: added font_family field in Document.Style in document.proto PiperOrigin-RevId: 488417413
1 parent b4dea4a commit 6b780a3

10 files changed

Lines changed: 462 additions & 56 deletions

google/cloud/documentai/v1beta3/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ proto_library(
2626
"document_io.proto",
2727
"document_processor_service.proto",
2828
"document_schema.proto",
29+
"evaluation.proto",
2930
"geometry.proto",
3031
"operation_metadata.proto",
3132
"processor.proto",

google/cloud/documentai/v1beta3/barcode.proto

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,40 +28,42 @@ option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
2828
message Barcode {
2929
// Format of a barcode.
3030
// The supported formats are:
31-
// CODE_128: Code 128 type.
32-
// CODE_39: Code 39 type.
33-
// CODE_93: Code 93 type.
34-
// CODABAR: Codabar type.
35-
// DATA_MATRIX: 2D Data Matrix type.
36-
// ITF: ITF type.
37-
// EAN_13: EAN-13 type.
38-
// EAN_8: EAN-8 type.
39-
// QR_CODE: 2D QR code type.
40-
// UPC_A: UPC-A type.
41-
// UPC_E: UPC-E type.
42-
// PDF417: PDF417 type.
43-
// AZTEC: 2D Aztec code type.
44-
// DATABAR: GS1 DataBar code type.
31+
//
32+
// - `CODE_128`: Code 128 type.
33+
// - `CODE_39`: Code 39 type.
34+
// - `CODE_93`: Code 93 type.
35+
// - `CODABAR`: Codabar type.
36+
// - `DATA_MATRIX`: 2D Data Matrix type.
37+
// - `ITF`: ITF type.
38+
// - `EAN_13`: EAN-13 type.
39+
// - `EAN_8`: EAN-8 type.
40+
// - `QR_CODE`: 2D QR code type.
41+
// - `UPC_A`: UPC-A type.
42+
// - `UPC_E`: UPC-E type.
43+
// - `PDF417`: PDF417 type.
44+
// - `AZTEC`: 2D Aztec code type.
45+
// - `DATABAR`: GS1 DataBar code type.
4546
string format = 1;
4647

4748
// Value format describes the format of the value that a barcode
4849
// encodes.
4950
// The supported formats are:
50-
// CONTACT_INFO: Contact information.
51-
// EMAIL: Email address.
52-
// ISBN: ISBN identifier.
53-
// PHONE: Phone number.
54-
// PRODUCT: Product.
55-
// SMS: SMS message.
56-
// TEXT: Text string.
57-
// URL: URL address.
58-
// WIFI: Wifi information.
59-
// GEO: Geo-localization.
60-
// CALENDAR_EVENT: Calendar event.
61-
// DRIVER_LICENSE: Driver's license.
51+
//
52+
// - `CONTACT_INFO`: Contact information.
53+
// - `EMAIL`: Email address.
54+
// - `ISBN`: ISBN identifier.
55+
// - `PHONE`: Phone number.
56+
// - `PRODUCT`: Product.
57+
// - `SMS`: SMS message.
58+
// - `TEXT`: Text string.
59+
// - `URL`: URL address.
60+
// - `WIFI`: Wifi information.
61+
// - `GEO`: Geo-localization.
62+
// - `CALENDAR_EVENT`: Calendar event.
63+
// - `DRIVER_LICENSE`: Driver's license.
6264
string value_format = 2;
6365

6466
// Raw value encoded in the barcode.
65-
// For example, 'MEBKM:TITLE:Google;URL:https://www.google.com;;'.
67+
// For example: `'MEBKM:TITLE:Google;URL:https://www.google.com;;'`.
6668
string raw_value = 3;
6769
}

google/cloud/documentai/v1beta3/document.proto

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ message Document {
9191

9292
// Font size.
9393
FontSize font_size = 7;
94+
95+
// Font family such as `Arial`, `Times New Roman`.
96+
// https://www.w3schools.com/cssref/pr_font_font-family.asp
97+
string font_family = 8;
9498
}
9599

96100
// A page in a [Document][google.cloud.documentai.v1beta3.Document].
@@ -169,7 +173,7 @@ message Document {
169173

170174
// Confidence of the current [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within context of the object this
171175
// layout is for. e.g. confidence can be for a single token, a table,
172-
// a visual element, etc. depending on context. Range [0, 1].
176+
// a visual element, etc. depending on context. Range `[0, 1]`.
173177
float confidence = 2;
174178

175179
// The bounding polygon for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
@@ -189,7 +193,7 @@ message Document {
189193
repeated DetectedLanguage detected_languages = 2;
190194

191195
// The history of this annotation.
192-
Provenance provenance = 3;
196+
Provenance provenance = 3 [deprecated = true];
193197
}
194198

195199
// A collection of lines that a human would perceive as a paragraph.
@@ -201,7 +205,7 @@ message Document {
201205
repeated DetectedLanguage detected_languages = 2;
202206

203207
// The history of this annotation.
204-
Provenance provenance = 3;
208+
Provenance provenance = 3 [deprecated = true];
205209
}
206210

207211
// A collection of tokens that a human would perceive as a line.
@@ -214,7 +218,7 @@ message Document {
214218
repeated DetectedLanguage detected_languages = 2;
215219

216220
// The history of this annotation.
217-
Provenance provenance = 3;
221+
Provenance provenance = 3 [deprecated = true];
218222
}
219223

220224
// A detected token.
@@ -249,8 +253,8 @@ message Document {
249253
// A list of detected languages together with confidence.
250254
repeated DetectedLanguage detected_languages = 3;
251255

252-
// The history of this annotation.
253-
Provenance provenance = 4;
256+
// The history of this annotation.
257+
Provenance provenance = 4 [deprecated = true];
254258
}
255259

256260
// A detected symbol.
@@ -309,6 +313,9 @@ message Document {
309313

310314
// A list of detected languages together with confidence.
311315
repeated DetectedLanguage detected_languages = 4;
316+
317+
// The history of this table.
318+
Provenance provenance = 5;
312319
}
313320

314321
// A form field detected on the page.
@@ -358,15 +365,43 @@ message Document {
358365

359366
// Detected language for a structural component.
360367
message DetectedLanguage {
361-
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
368+
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
362369
// information, see
363370
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
364371
string language_code = 1;
365372

366-
// Confidence of detected language. Range [0, 1].
373+
// Confidence of detected language. Range `[0, 1]`.
367374
float confidence = 2;
368375
}
369376

377+
// Image Quality Scores for the page image
378+
message ImageQualityScores {
379+
// Image Quality Defects
380+
message DetectedDefect {
381+
// Name of the defect type. Supported values are:
382+
//
383+
// - `quality/defect_blurry`
384+
// - `quality/defect_noisy`
385+
// - `quality/defect_dark`
386+
// - `quality/defect_faint`
387+
// - `quality/defect_text_too_small`
388+
// - `quality/defect_document_cutoff`
389+
// - `quality/defect_text_cutoff`
390+
// - `quality/defect_glare`
391+
string type = 1;
392+
393+
// Confidence of detected defect. Range `[0, 1]` where 1 indicates
394+
// strong confidence of that the defect exists.
395+
float confidence = 2;
396+
}
397+
398+
// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
399+
float quality_score = 1;
400+
401+
// A list of detected defects.
402+
repeated DetectedDefect detected_defects = 2;
403+
}
404+
370405
// 1-based index for current [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent [Document][google.cloud.documentai.v1beta3.Document].
371406
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta3.Document] for individual
372407
// processing.
@@ -422,8 +457,11 @@ message Document {
422457
// A list of detected barcodes.
423458
repeated DetectedBarcode detected_barcodes = 15;
424459

460+
// Image Quality Scores.
461+
ImageQualityScores image_quality_scores = 17;
462+
425463
// The history of this page.
426-
Provenance provenance = 16;
464+
Provenance provenance = 16 [deprecated = true];
427465
}
428466

429467
// An entity that could be a phrase in the text or a property that belongs to
@@ -471,6 +509,7 @@ message Document {
471509
// or int normalized text by default.
472510
//
473511
// Below are sample formats mapped to structured values.
512+
//
474513
// - Money/Currency type (`money_value`) is in the ISO 4217 text format.
475514
// - Date type (`date_value`) is in the ISO 8601 text format.
476515
// - Datetime type (`datetime_value`) is in the ISO 8601 text format.
@@ -484,14 +523,13 @@ message Document {
484523
// Required. Entity type from a schema e.g. `Address`.
485524
string type = 2 [(google.api.field_behavior) = REQUIRED];
486525

487-
// Optional. Text value in the document e.g. `1600 Amphitheatre Pkwy`. If the entity
488-
// is not present in the document, this field will be empty.
526+
// Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
489527
string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];
490528

491529
// Optional. Deprecated. Use `id` field instead.
492530
string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];
493531

494-
// Optional. Confidence of detected Schema entity. Range [0, 1].
532+
// Optional. Confidence of detected Schema entity. Range `[0, 1]`.
495533
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
496534

497535
// Optional. Represents the provenance of this entity wrt. the location on the
@@ -605,7 +643,7 @@ message Document {
605643
// Optional. Identifies the bounding polygon of a layout element on the page.
606644
BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];
607645

608-
// Optional. Confidence of detected page element, if applicable. Range [0, 1].
646+
// Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
609647
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
610648
}
611649

@@ -726,7 +764,7 @@ message Document {
726764
string changed_text = 2;
727765

728766
// The history of this annotation.
729-
repeated Provenance provenance = 3;
767+
repeated Provenance provenance = 3 [deprecated = true];
730768
}
731769

732770
// Original source document from the user.
@@ -765,9 +803,9 @@ message Document {
765803
// Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
766804
repeated EntityRelation entity_relations = 8;
767805

768-
// Placeholder. A list of text corrections made to [Document.text]. This is
769-
// usually used for annotating corrections to OCR mistakes. Text changes for
770-
// a given revision may not overlap with each other.
806+
// Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1beta3.Document.text]. This
807+
// is usually used for annotating corrections to OCR mistakes. Text changes
808+
// for a given revision may not overlap with each other.
771809
repeated TextChange text_changes = 14;
772810

773811
// Information about the sharding if this document is sharded part of a larger

google/cloud/documentai/v1beta3/document_io.proto

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ message RawDocument {
3030
bytes content = 1;
3131

3232
// An IANA MIME type (RFC6838) indicating the nature and format of the
33-
// [content].
33+
// [content][google.cloud.documentai.v1beta3.RawDocument.content].
3434
string mime_type = 2;
3535
}
3636

@@ -59,7 +59,7 @@ message GcsPrefix {
5959
message BatchDocumentsInputConfig {
6060
// The source.
6161
oneof source {
62-
// The set of documents that match the specified Cloud Storage [gcs_prefix].
62+
// The set of documents that match the specified Cloud Storage `gcs_prefix`.
6363
GcsPrefix gcs_prefix = 1;
6464

6565
// The set of documents individually specified on Cloud Storage.

0 commit comments

Comments
 (0)