Skip to content

Commit b2b3723

Browse files
Google APIscopybara-github
authored andcommitted
feat: add StyleInfo to document.proto
feat: add IMPORTING enum to State in processor.proto chore: updated comments PiperOrigin-RevId: 540937637
1 parent 120a89c commit b2b3723

11 files changed

Lines changed: 327 additions & 161 deletions

google/cloud/documentai/v1/barcode.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

google/cloud/documentai/v1/document.proto

Lines changed: 87 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -64,7 +64,8 @@ message Document {
6464
// Font size for the text.
6565
float size = 1;
6666

67-
// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
67+
// Unit for the font size. Follows CSS naming (such as `in`, `px`, and
68+
// `pt`).
6869
string unit = 2;
6970
}
7071

@@ -78,17 +79,18 @@ message Document {
7879
// Text background color.
7980
google.type.Color background_color = 3;
8081

81-
// Font weight. Possible values are normal, bold, bolder, and lighter.
82-
// https://www.w3schools.com/cssref/pr_font_weight.asp
82+
// [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
83+
// Possible values are `normal`, `bold`, `bolder`, and `lighter`.
8384
string font_weight = 4;
8485

85-
// Text style. Possible values are normal, italic, and oblique.
86-
// https://www.w3schools.com/cssref/pr_font_font-style.asp
86+
// [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
87+
// Possible values are `normal`, `italic`, and `oblique`.
8788
string text_style = 5;
8889

89-
// Text decoration. Follows CSS standard.
90-
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
91-
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
90+
// [Text
91+
// decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
92+
// Follows CSS standard. <text-decoration-line> <text-decoration-color>
93+
// <text-decoration-style>
9294
string text_decoration = 6;
9395

9496
// Font size.
@@ -118,7 +120,9 @@ message Document {
118120
// Raw byte content of the image.
119121
bytes content = 1;
120122

121-
// Encoding mime type for the image.
123+
// Encoding [media type (MIME
124+
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
125+
// for the image.
122126
string mime_type = 2;
123127

124128
// Width of the image in pixels.
@@ -255,6 +259,59 @@ message Document {
255259
Type type = 1;
256260
}
257261

262+
// Font and other text style attributes.
263+
message StyleInfo {
264+
// Font size in points (`1` point is `¹⁄₇₂` inches).
265+
int32 font_size = 1;
266+
267+
// Font size in pixels, equal to _unrounded
268+
// [font_size][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_size]_
269+
// * _resolution_ ÷ `72.0`.
270+
double pixel_font_size = 2;
271+
272+
// Letter spacing in points.
273+
double letter_spacing = 3;
274+
275+
// Name or style of the font.
276+
string font_type = 4;
277+
278+
// Whether the text is bold (equivalent to
279+
// [font_weight][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_weight]
280+
// is at least `700`).
281+
bool bold = 5;
282+
283+
// Whether the text is italic.
284+
bool italic = 6;
285+
286+
// Whether the text is underlined.
287+
bool underlined = 7;
288+
289+
// Whether the text is strikethrough.
290+
bool strikeout = 8;
291+
292+
// Whether the text is a subscript.
293+
bool subscript = 9;
294+
295+
// Whether the text is a superscript.
296+
bool superscript = 10;
297+
298+
// Whether the text is in small caps.
299+
bool smallcaps = 11;
300+
301+
// TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
302+
// Normal is `400`, bold is `700`.
303+
int32 font_weight = 12;
304+
305+
// Whether the text is handwritten.
306+
bool handwritten = 13;
307+
308+
// Color of the text.
309+
google.type.Color text_color = 14;
310+
311+
// Color of the background.
312+
google.type.Color background_color = 15;
313+
}
314+
258315
// [Layout][google.cloud.documentai.v1.Document.Page.Layout] for
259316
// [Token][google.cloud.documentai.v1.Document.Page.Token].
260317
Layout layout = 1;
@@ -268,6 +325,9 @@ message Document {
268325

269326
// The history of this annotation.
270327
Provenance provenance = 4 [deprecated = true];
328+
329+
// Text style attributes.
330+
StyleInfo style_info = 5;
271331
}
272332

273333
// A detected symbol.
@@ -333,7 +393,7 @@ message Document {
333393
repeated DetectedLanguage detected_languages = 4;
334394

335395
// The history of this table.
336-
Provenance provenance = 5;
396+
Provenance provenance = 5 [deprecated = true];
337397
}
338398

339399
// A form field detected on the page.
@@ -388,16 +448,16 @@ message Document {
388448

389449
// Detected language for a structural component.
390450
message DetectedLanguage {
391-
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
392-
// information, see
393-
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
451+
// The [BCP-47 language
452+
// code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
453+
// such as `en-US` or `sr-Latn`.
394454
string language_code = 1;
395455

396456
// Confidence of detected language. Range `[0, 1]`.
397457
float confidence = 2;
398458
}
399459

400-
// Image Quality Scores for the page image
460+
// Image quality scores for the page image.
401461
message ImageQualityScores {
402462
// Image Quality Defects
403463
message DetectedDefect {
@@ -413,12 +473,12 @@ message Document {
413473
// - `quality/defect_glare`
414474
string type = 1;
415475

416-
// Confidence of detected defect. Range `[0, 1]` where 1 indicates
417-
// strong confidence of that the defect exists.
476+
// Confidence of detected defect. Range `[0, 1]` where `1` indicates
477+
// strong confidence that the defect exists.
418478
float confidence = 2;
419479
}
420480

421-
// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
481+
// The overall quality score. Range `[0, 1]` where `1` is perfect quality.
422482
float quality_score = 1;
423483

424484
// A list of detected defects.
@@ -482,7 +542,7 @@ message Document {
482542
// A list of detected barcodes.
483543
repeated DetectedBarcode detected_barcodes = 15;
484544

485-
// Image Quality Scores.
545+
// Image quality scores.
486546
ImageQualityScores image_quality_scores = 17;
487547

488548
// The history of this page.
@@ -736,9 +796,9 @@ message Document {
736796
REMOVE = 2;
737797

738798
// Updates any fields within the given provenance scope of the message. It
739-
// 'overwrites' the fields rather than replacing them. This is
740-
// especially relevant when we just want to update a field value of an
741-
// entity without also affecting all the child properties.
799+
// overwrites the fields rather than replacing them. Use this when you
800+
// want to update a field value of an entity without also updating all the
801+
// child properties.
742802
UPDATE = 7;
743803

744804
// Currently unused. Replace an element identified by `parent`.
@@ -835,10 +895,9 @@ message Document {
835895
// Original source document from the user.
836896
oneof source {
837897
// Optional. Currently supports Google Cloud Storage URI of the form
838-
// `gs://bucket_name/object_name`. Object versioning is not supported.
839-
// See [Google Cloud Storage Request
840-
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
841-
// info.
898+
// `gs://bucket_name/object_name`. Object versioning is not supported.
899+
// For more information, refer to [Google Cloud Storage Request
900+
// URIs](https://cloud.google.com/storage/docs/reference-uris).
842901
string uri = 1 [(google.api.field_behavior) = OPTIONAL];
843902

844903
// Optional. Inline document content, represented as a stream of bytes.
@@ -847,9 +906,8 @@ message Document {
847906
bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
848907
}
849908

850-
// An IANA published MIME type (also referred to as media type). For more
851-
// information, see
852-
// https://www.iana.org/assignments/media-types/media-types.xhtml.
909+
// An IANA published [media type (MIME
910+
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
853911
string mime_type = 3;
854912

855913
// Optional. UTF-8 encoded text in reading order from the document.

google/cloud/documentai/v1/document_io.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)