@@ -91,6 +91,10 @@ message Document {
9191
9292 // Font size.
9393 FontSize font_size = 7 ;
94+
95+ // Font family such as `Arial`, `Times New Roman`.
96+ // https://www.w3schools.com/cssref/pr_font_font-family.asp
97+ string font_family = 8 ;
9498 }
9599
96100 // A page in a [Document][google.cloud.documentai.v1beta3.Document].
@@ -169,7 +173,7 @@ message Document {
169173
170174 // Confidence of the current [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] within context of the object this
171175 // layout is for. e.g. confidence can be for a single token, a table,
172- // a visual element, etc. depending on context. Range [0, 1].
176+ // a visual element, etc. depending on context. Range ` [0, 1]` .
173177 float confidence = 2 ;
174178
175179 // The bounding polygon for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout].
@@ -189,7 +193,7 @@ message Document {
189193 repeated DetectedLanguage detected_languages = 2 ;
190194
191195 // The history of this annotation.
192- Provenance provenance = 3 ;
196+ Provenance provenance = 3 [ deprecated = true ] ;
193197 }
194198
195199 // A collection of lines that a human would perceive as a paragraph.
@@ -201,7 +205,7 @@ message Document {
201205 repeated DetectedLanguage detected_languages = 2 ;
202206
203207 // The history of this annotation.
204- Provenance provenance = 3 ;
208+ Provenance provenance = 3 [ deprecated = true ] ;
205209 }
206210
207211 // A collection of tokens that a human would perceive as a line.
@@ -214,7 +218,7 @@ message Document {
214218 repeated DetectedLanguage detected_languages = 2 ;
215219
216220 // The history of this annotation.
217- Provenance provenance = 3 ;
221+ Provenance provenance = 3 [ deprecated = true ] ;
218222 }
219223
220224 // A detected token.
@@ -249,8 +253,8 @@ message Document {
249253 // A list of detected languages together with confidence.
250254 repeated DetectedLanguage detected_languages = 3 ;
251255
252- // The history of this annotation.
253- Provenance provenance = 4 ;
256+ // The history of this annotation.
257+ Provenance provenance = 4 [ deprecated = true ] ;
254258 }
255259
256260 // A detected symbol.
@@ -309,6 +313,9 @@ message Document {
309313
310314 // A list of detected languages together with confidence.
311315 repeated DetectedLanguage detected_languages = 4 ;
316+
317+ // The history of this table.
318+ Provenance provenance = 5 ;
312319 }
313320
314321 // A form field detected on the page.
@@ -358,15 +365,43 @@ message Document {
358365
359366 // Detected language for a structural component.
360367 message DetectedLanguage {
361- // The BCP-47 language code, such as " en-US" or " sr-Latn" . For more
368+ // The BCP-47 language code, such as ` en-US` or ` sr-Latn` . For more
362369 // information, see
363370 // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
364371 string language_code = 1 ;
365372
366- // Confidence of detected language. Range [0, 1].
373+ // Confidence of detected language. Range ` [0, 1]` .
367374 float confidence = 2 ;
368375 }
369376
377+ // Image Quality Scores for the page image
378+ message ImageQualityScores {
379+ // Image Quality Defects
380+ message DetectedDefect {
381+ // Name of the defect type. Supported values are:
382+ //
383+ // - `quality/defect_blurry`
384+ // - `quality/defect_noisy`
385+ // - `quality/defect_dark`
386+ // - `quality/defect_faint`
387+ // - `quality/defect_text_too_small`
388+ // - `quality/defect_document_cutoff`
389+ // - `quality/defect_text_cutoff`
390+ // - `quality/defect_glare`
391+ string type = 1 ;
392+
393+ // Confidence of detected defect. Range `[0, 1]` where 1 indicates
394+ // strong confidence of that the defect exists.
395+ float confidence = 2 ;
396+ }
397+
398+ // The overall quality score. Range `[0, 1]` where 1 is perfect quality.
399+ float quality_score = 1 ;
400+
401+ // A list of detected defects.
402+ repeated DetectedDefect detected_defects = 2 ;
403+ }
404+
370405 // 1-based index for current [Page][google.cloud.documentai.v1beta3.Document.Page] in a parent [Document][google.cloud.documentai.v1beta3.Document].
371406 // Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta3.Document] for individual
372407 // processing.
@@ -422,8 +457,11 @@ message Document {
422457 // A list of detected barcodes.
423458 repeated DetectedBarcode detected_barcodes = 15 ;
424459
460+ // Image Quality Scores.
461+ ImageQualityScores image_quality_scores = 17 ;
462+
425463 // The history of this page.
426- Provenance provenance = 16 ;
464+ Provenance provenance = 16 [ deprecated = true ] ;
427465 }
428466
429467 // An entity that could be a phrase in the text or a property that belongs to
@@ -471,6 +509,7 @@ message Document {
471509 // or int normalized text by default.
472510 //
473511 // Below are sample formats mapped to structured values.
512+ //
474513 // - Money/Currency type (`money_value`) is in the ISO 4217 text format.
475514 // - Date type (`date_value`) is in the ISO 8601 text format.
476515 // - Datetime type (`datetime_value`) is in the ISO 8601 text format.
@@ -484,14 +523,13 @@ message Document {
484523 // Required. Entity type from a schema e.g. `Address`.
485524 string type = 2 [(google.api.field_behavior ) = REQUIRED ];
486525
487- // Optional. Text value in the document e.g. `1600 Amphitheatre Pkwy`. If the entity
488- // is not present in the document, this field will be empty.
526+ // Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
489527 string mention_text = 3 [(google.api.field_behavior ) = OPTIONAL ];
490528
491529 // Optional. Deprecated. Use `id` field instead.
492530 string mention_id = 4 [(google.api.field_behavior ) = OPTIONAL ];
493531
494- // Optional. Confidence of detected Schema entity. Range [0, 1].
532+ // Optional. Confidence of detected Schema entity. Range ` [0, 1]` .
495533 float confidence = 5 [(google.api.field_behavior ) = OPTIONAL ];
496534
497535 // Optional. Represents the provenance of this entity wrt. the location on the
@@ -605,7 +643,7 @@ message Document {
605643 // Optional. Identifies the bounding polygon of a layout element on the page.
606644 BoundingPoly bounding_poly = 4 [(google.api.field_behavior ) = OPTIONAL ];
607645
608- // Optional. Confidence of detected page element, if applicable. Range [0, 1].
646+ // Optional. Confidence of detected page element, if applicable. Range ` [0, 1]` .
609647 float confidence = 5 [(google.api.field_behavior ) = OPTIONAL ];
610648 }
611649
@@ -726,7 +764,7 @@ message Document {
726764 string changed_text = 2 ;
727765
728766 // The history of this annotation.
729- repeated Provenance provenance = 3 ;
767+ repeated Provenance provenance = 3 [ deprecated = true ] ;
730768 }
731769
732770 // Original source document from the user.
@@ -765,9 +803,9 @@ message Document {
765803 // Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1beta3.Document.entities].
766804 repeated EntityRelation entity_relations = 8 ;
767805
768- // Placeholder. A list of text corrections made to [Document.text]. This is
769- // usually used for annotating corrections to OCR mistakes. Text changes for
770- // a given revision may not overlap with each other.
806+ // Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1beta3.Document.text]. This
807+ // is usually used for annotating corrections to OCR mistakes. Text changes
808+ // for a given revision may not overlap with each other.
771809 repeated TextChange text_changes = 14 ;
772810
773811 // Information about the sharding if this document is sharded part of a larger
0 commit comments