1- // Copyright 2022 Google LLC
1+ // Copyright 2023 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -64,7 +64,8 @@ message Document {
6464 // Font size for the text.
6565 float size = 1 ;
6666
67- // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
67+ // Unit for the font size. Follows CSS naming (such as `in`, `px`, and
68+ // `pt`).
6869 string unit = 2 ;
6970 }
7071
@@ -78,17 +79,18 @@ message Document {
7879 // Text background color.
7980 google.type.Color background_color = 3 ;
8081
81- // Font weight. Possible values are normal, bold, bolder, and lighter .
82- // https://www.w3schools.com/cssref/pr_font_weight.asp
82+ // [ Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp) .
83+ // Possible values are `normal`, `bold`, `bolder`, and `lighter`.
8384 string font_weight = 4 ;
8485
85- // Text style. Possible values are normal, italic, and oblique .
86- // https://www.w3schools.com/cssref/pr_font_font-style.asp
86+ // [ Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp) .
87+ // Possible values are `normal`, `italic`, and `oblique`.
8788 string text_style = 5 ;
8889
89- // Text decoration. Follows CSS standard.
90- // <text-decoration-line> <text-decoration-color> <text-decoration-style>
91- // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
90+ // [Text
91+ // decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
92+ // Follows CSS standard. <text-decoration-line> <text-decoration-color>
93+ // <text-decoration-style>
9294 string text_decoration = 6 ;
9395
9496 // Font size.
@@ -118,7 +120,9 @@ message Document {
118120 // Raw byte content of the image.
119121 bytes content = 1 ;
120122
121- // Encoding mime type for the image.
123+ // Encoding [media type (MIME
124+ // type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
125+ // for the image.
122126 string mime_type = 2 ;
123127
124128 // Width of the image in pixels.
@@ -255,6 +259,59 @@ message Document {
255259 Type type = 1 ;
256260 }
257261
262+ // Font and other text style attributes.
263+ message StyleInfo {
264+ // Font size in points (`1` point is `¹⁄₇₂` inches).
265+ int32 font_size = 1 ;
266+
267+ // Font size in pixels, equal to _unrounded
268+ // [font_size][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_size]_
269+ // * _resolution_ ÷ `72.0`.
270+ double pixel_font_size = 2 ;
271+
272+ // Letter spacing in points.
273+ double letter_spacing = 3 ;
274+
275+ // Name or style of the font.
276+ string font_type = 4 ;
277+
278+ // Whether the text is bold (equivalent to
279+ // [font_weight][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_weight]
280+ // is at least `700`).
281+ bool bold = 5 ;
282+
283+ // Whether the text is italic.
284+ bool italic = 6 ;
285+
286+ // Whether the text is underlined.
287+ bool underlined = 7 ;
288+
289+ // Whether the text is strikethrough.
290+ bool strikeout = 8 ;
291+
292+ // Whether the text is a subscript.
293+ bool subscript = 9 ;
294+
295+ // Whether the text is a superscript.
296+ bool superscript = 10 ;
297+
298+ // Whether the text is in small caps.
299+ bool smallcaps = 11 ;
300+
301+ // TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
302+ // Normal is `400`, bold is `700`.
303+ int32 font_weight = 12 ;
304+
305+ // Whether the text is handwritten.
306+ bool handwritten = 13 ;
307+
308+ // Color of the text.
309+ google.type.Color text_color = 14 ;
310+
311+ // Color of the background.
312+ google.type.Color background_color = 15 ;
313+ }
314+
258315 // [Layout][google.cloud.documentai.v1.Document.Page.Layout] for
259316 // [Token][google.cloud.documentai.v1.Document.Page.Token].
260317 Layout layout = 1 ;
@@ -268,6 +325,9 @@ message Document {
268325
269326 // The history of this annotation.
270327 Provenance provenance = 4 [deprecated = true ];
328+
329+ // Text style attributes.
330+ StyleInfo style_info = 5 ;
271331 }
272332
273333 // A detected symbol.
@@ -333,7 +393,7 @@ message Document {
333393 repeated DetectedLanguage detected_languages = 4 ;
334394
335395 // The history of this table.
336- Provenance provenance = 5 ;
396+ Provenance provenance = 5 [ deprecated = true ] ;
337397 }
338398
339399 // A form field detected on the page.
@@ -388,16 +448,16 @@ message Document {
388448
389449 // Detected language for a structural component.
390450 message DetectedLanguage {
391- // The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
392- // information, see
393- // https://www.unicode.org/reports/tr35/#Unicode_locale_identifier .
451+ // The [ BCP-47 language
452+ // code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
453+ // such as `en-US` or `sr-Latn` .
394454 string language_code = 1 ;
395455
396456 // Confidence of detected language. Range `[0, 1]`.
397457 float confidence = 2 ;
398458 }
399459
400- // Image Quality Scores for the page image
460+ // Image quality scores for the page image.
401461 message ImageQualityScores {
402462 // Image Quality Defects
403463 message DetectedDefect {
@@ -413,12 +473,12 @@ message Document {
413473 // - `quality/defect_glare`
414474 string type = 1 ;
415475
416- // Confidence of detected defect. Range `[0, 1]` where 1 indicates
417- // strong confidence of that the defect exists.
476+ // Confidence of detected defect. Range `[0, 1]` where `1` indicates
477+ // strong confidence that the defect exists.
418478 float confidence = 2 ;
419479 }
420480
421- // The overall quality score. Range `[0, 1]` where 1 is perfect quality.
481+ // The overall quality score. Range `[0, 1]` where `1` is perfect quality.
422482 float quality_score = 1 ;
423483
424484 // A list of detected defects.
@@ -482,7 +542,7 @@ message Document {
482542 // A list of detected barcodes.
483543 repeated DetectedBarcode detected_barcodes = 15 ;
484544
485- // Image Quality Scores .
545+ // Image quality scores .
486546 ImageQualityScores image_quality_scores = 17 ;
487547
488548 // The history of this page.
@@ -736,9 +796,9 @@ message Document {
736796 REMOVE = 2 ;
737797
738798 // Updates any fields within the given provenance scope of the message. It
739- // ' overwrites' the fields rather than replacing them. This is
740- // especially relevant when we just want to update a field value of an
741- // entity without also affecting all the child properties.
799+ // overwrites the fields rather than replacing them. Use this when you
800+ // want to update a field value of an entity without also updating all the
801+ // child properties.
742802 UPDATE = 7 ;
743803
744804 // Currently unused. Replace an element identified by `parent`.
@@ -835,10 +895,9 @@ message Document {
835895 // Original source document from the user.
836896 oneof source {
837897 // Optional. Currently supports Google Cloud Storage URI of the form
838- // `gs://bucket_name/object_name`. Object versioning is not supported.
839- // See [Google Cloud Storage Request
840- // URIs](https://cloud.google.com/storage/docs/reference-uris) for more
841- // info.
898+ // `gs://bucket_name/object_name`. Object versioning is not supported.
899+ // For more information, refer to [Google Cloud Storage Request
900+ // URIs](https://cloud.google.com/storage/docs/reference-uris).
842901 string uri = 1 [(google.api.field_behavior ) = OPTIONAL ];
843902
844903 // Optional. Inline document content, represented as a stream of bytes.
@@ -847,9 +906,8 @@ message Document {
847906 bytes content = 2 [(google.api.field_behavior ) = OPTIONAL ];
848907 }
849908
850- // An IANA published MIME type (also referred to as media type). For more
851- // information, see
852- // https://www.iana.org/assignments/media-types/media-types.xhtml.
909+ // An IANA published [media type (MIME
910+ // type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
853911 string mime_type = 3 ;
854912
855913 // Optional. UTF-8 encoded text in reading order from the document.
0 commit comments