Skip to content

Commit 237b3c1

Browse files
Google APIscopybara-github
authored andcommitted
feat!: removed id field from Document message
feat: added http configuration and document publishing for v1beta2 feat: added ImportDocuments, GetDocument and BatchDeleteDocuments RPCs for v1beta3 PiperOrigin-RevId: 548236986
1 parent e386b77 commit 237b3c1

16 files changed

Lines changed: 1150 additions & 158 deletions

google/cloud/documentai/v1/BUILD.bazel

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ java_gapic_library(
9191
rest_numeric_enums = True,
9292
service_yaml = "documentai_v1.yaml",
9393
test_deps = [
94-
":documentai_java_grpc",
9594
"//google/cloud/location:location_java_grpc",
95+
":documentai_java_grpc",
9696
],
9797
transport = "grpc+rest",
9898
deps = [
@@ -190,15 +190,9 @@ load(
190190
"@com_google_googleapis_imports//:imports.bzl",
191191
"py_gapic_assembly_pkg",
192192
"py_gapic_library",
193-
"py_proto_library",
194193
"py_test",
195194
)
196195

197-
py_proto_library(
198-
name = "documentai_py_proto",
199-
deps = [":documentai_proto"],
200-
)
201-
202196
py_gapic_library(
203197
name = "documentai_py_gapic",
204198
srcs = [":documentai_proto"],
@@ -252,7 +246,9 @@ php_gapic_library(
252246
rest_numeric_enums = True,
253247
service_yaml = "documentai_v1.yaml",
254248
transport = "grpc+rest",
255-
deps = [":documentai_php_proto"],
249+
deps = [
250+
":documentai_php_proto",
251+
],
256252
)
257253

258254
# Open Source Packages
@@ -362,6 +358,7 @@ load(
362358

363359
csharp_proto_library(
364360
name = "documentai_csharp_proto",
361+
extra_opts = [],
365362
deps = [":documentai_proto"],
366363
)
367364

google/cloud/documentai/v1/document_io.proto

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,44 @@ message DocumentOutputConfig {
101101
GcsOutputConfig gcs_output_config = 1;
102102
}
103103
}
104+
105+
// Config for Document OCR.
106+
message OcrConfig {
107+
// Hints for OCR Engine
108+
message Hints {
109+
// List of BCP-47 language codes to use for OCR. In most cases, not
110+
// specifying it yields the best results since it enables automatic language
111+
// detection. For languages based on the Latin alphabet, setting hints is
112+
// not needed. In rare cases, when the language of the text in the
113+
// image is known, setting a hint will help get better results (although it
114+
// will be a significant hindrance if the hint is wrong).
115+
repeated string language_hints = 1;
116+
}
117+
118+
// Hints for the OCR model.
119+
Hints hints = 2;
120+
121+
// Enables special handling for PDFs with existing text information. Results
122+
// in better text extraction quality in such PDF inputs.
123+
bool enable_native_pdf_parsing = 3;
124+
125+
// Enables intelligent document quality scores after OCR. Can help with
126+
// diagnosing why OCR responses are of poor quality for a given input.
127+
// Adds additional latency comparable to regular OCR to the process call.
128+
bool enable_image_quality_scores = 4;
129+
130+
// A list of advanced OCR options to further fine-tune OCR behavior. Current
131+
// valid values are:
132+
//
133+
// - `legacy_layout`: a heuristics layout detection algorithm, which serves as
134+
// an alternative to the current ML-based layout detection algorithm.
135+
// Customers can choose the best suitable layout algorithm based on their
136+
// situation.
137+
repeated string advanced_ocr_options = 5;
138+
139+
// Includes symbol level OCR information if set to true.
140+
bool enable_symbol = 6;
141+
142+
// Turn on font id model and returns font style information.
143+
bool compute_style_info = 8;
144+
}

google/cloud/documentai/v1/document_processor_service.proto

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,13 @@ service DocumentProcessorService {
326326
}
327327
}
328328

329+
// Options for Process API
330+
message ProcessOptions {
331+
// Only applicable to `OCR_PROCESSOR`. Returns error if set on other
332+
// processor types.
333+
OcrConfig ocr_config = 1;
334+
}
335+
329336
// Request message for the
330337
// [ProcessDocument][google.cloud.documentai.v1.DocumentProcessorService.ProcessDocument]
331338
// method.
@@ -337,6 +344,9 @@ message ProcessRequest {
337344

338345
// A raw document content (bytes).
339346
RawDocument raw_document = 5;
347+
348+
// A raw document on Google Cloud Storage.
349+
GcsDocument gcs_document = 8;
340350
}
341351

342352
// Required. The resource name of the
@@ -363,6 +373,9 @@ message ProcessRequest {
363373
// output. Only supports top-level document and pages field, so it must be in
364374
// the form of `{document_field_name}` or `pages.{page_field_name}`.
365375
google.protobuf.FieldMask field_mask = 6;
376+
377+
// Inference-time options for the process API
378+
ProcessOptions process_options = 7;
366379
}
367380

368381
// The status of human review on a processed document.
@@ -443,6 +456,9 @@ message BatchProcessRequest {
443456
// Whether human review should be skipped for this request. Default to
444457
// `false`.
445458
bool skip_human_review = 4;
459+
460+
// Inference-time options for the process API
461+
ProcessOptions process_options = 7;
446462
}
447463

448464
// Response message for
@@ -895,6 +911,28 @@ message TrainProcessorVersionRequest {
895911
BatchDocumentsInputConfig test_documents = 4;
896912
}
897913

914+
// Options to control the training of the Custom Document Extraction (CDE)
915+
// Processor.
916+
message CustomDocumentExtractionOptions {
917+
// Training Method for CDE. TRAINING_METHOD_UNSPECIFIED will fallback to
918+
// MODEL_BASED.
919+
enum TrainingMethod {
920+
TRAINING_METHOD_UNSPECIFIED = 0;
921+
922+
MODEL_BASED = 1;
923+
924+
TEMPLATE_BASED = 2;
925+
}
926+
927+
// Training method to use for CDE training.
928+
TrainingMethod training_method = 3;
929+
}
930+
931+
oneof processor_flags {
932+
// Options to control Custom Document Extraction (CDE) Processor.
933+
CustomDocumentExtractionOptions custom_document_extraction_options = 5;
934+
}
935+
898936
// Required. The parent (project, location and processor) to create the new
899937
// version for. Format:
900938
// `projects/{project}/locations/{location}/processors/{processor}`.

google/cloud/documentai/v1/processor.proto

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ message ProcessorVersion {
108108
// The KMS key version with which data is encrypted.
109109
string kms_key_version_name = 10;
110110

111-
// Denotes that this `ProcessorVersion` is managed by Google.
112-
bool google_managed = 11;
111+
// Output only. Denotes that this `ProcessorVersion` is managed by Google.
112+
bool google_managed = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
113113

114114
// If set, information about the eventual deprecation of this version.
115115
DeprecationInfo deprecation_info = 13;

google/cloud/documentai/v1beta2/BUILD.bazel

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This file was automatically generated by BuildFileGenerator
2-
# https://github.com/googleapis/gapic-generator/tree/master/rules_gapic/bazel
2+
# https://github.com/googleapis/rules_gapic/tree/master/bazel
33

44
# Most of the manual changes to this file will be overwritten.
55
# It's **only** allowed to change the following rule attribute values:
@@ -21,6 +21,7 @@ load("@com_google_googleapis_imports//:imports.bzl", "proto_library_with_info")
2121
proto_library(
2222
name = "documentai_proto",
2323
srcs = [
24+
"barcode.proto",
2425
"document.proto",
2526
"document_understanding.proto",
2627
"geometry.proto",
@@ -32,6 +33,10 @@ proto_library(
3233
"//google/longrunning:operations_proto",
3334
"//google/rpc:status_proto",
3435
"//google/type:color_proto",
36+
"//google/type:date_proto",
37+
"//google/type:datetime_proto",
38+
"//google/type:money_proto",
39+
"//google/type:postal_address_proto",
3540
"@com_google_protobuf//:timestamp_proto",
3641
],
3742
)
@@ -41,6 +46,7 @@ proto_library_with_info(
4146
deps = [
4247
":documentai_proto",
4348
"//google/cloud:common_resources_proto",
49+
"//google/cloud/location:location_proto",
4450
],
4551
)
4652

@@ -70,15 +76,19 @@ java_grpc_library(
7076
java_gapic_library(
7177
name = "documentai_java_gapic",
7278
srcs = [":documentai_proto_with_info"],
79+
gapic_yaml = "documentai_gapic.yaml",
7380
grpc_service_config = "documentai_v1beta2_grpc_service_config.json",
7481
rest_numeric_enums = True,
7582
service_yaml = "documentai_v1beta2.yaml",
7683
test_deps = [
84+
"//google/cloud/location:location_java_grpc",
7785
":documentai_java_grpc",
7886
],
7987
transport = "grpc",
8088
deps = [
8189
":documentai_java_proto",
90+
"//google/api:api_java_proto",
91+
"//google/cloud/location:location_java_proto",
8292
],
8393
)
8494

@@ -123,6 +133,10 @@ go_proto_library(
123133
"//google/longrunning:longrunning_go_proto",
124134
"//google/rpc:status_go_proto",
125135
"//google/type:color_go_proto",
136+
"//google/type:date_go_proto",
137+
"//google/type:datetime_go_proto",
138+
"//google/type:money_go_proto",
139+
"//google/type:postaladdress_go_proto",
126140
],
127141
)
128142

@@ -131,12 +145,14 @@ go_gapic_library(
131145
srcs = [":documentai_proto_with_info"],
132146
grpc_service_config = "documentai_v1beta2_grpc_service_config.json",
133147
importpath = "cloud.google.com/go/documentai/apiv1beta2;documentai",
148+
metadata = True,
134149
release_level = "beta",
135150
rest_numeric_enums = True,
136151
service_yaml = "documentai_v1beta2.yaml",
137152
transport = "grpc",
138153
deps = [
139154
":documentai_go_proto",
155+
"//google/cloud/location:location_go_proto",
140156
"//google/longrunning:longrunning_go_proto",
141157
"@com_google_cloud_go_longrunning//:go_default_library",
142158
"@com_google_cloud_go_longrunning//autogen:go_default_library",
@@ -148,6 +164,7 @@ go_gapic_assembly_pkg(
148164
name = "gapi-cloud-documentai-v1beta2-go",
149165
deps = [
150166
":documentai_go_gapic",
167+
":documentai_go_gapic_srcjar-metadata.srcjar",
151168
":documentai_go_gapic_srcjar-snippets.srcjar",
152169
":documentai_go_gapic_srcjar-test.srcjar",
153170
":documentai_go_proto",
@@ -171,6 +188,8 @@ py_gapic_library(
171188
rest_numeric_enums = True,
172189
service_yaml = "documentai_v1beta2.yaml",
173190
transport = "grpc",
191+
deps = [
192+
],
174193
)
175194

176195
py_test(
@@ -210,10 +229,13 @@ php_gapic_library(
210229
name = "documentai_php_gapic",
211230
srcs = [":documentai_proto_with_info"],
212231
grpc_service_config = "documentai_v1beta2_grpc_service_config.json",
232+
migration_mode = "PRE_MIGRATION_SURFACE_ONLY",
213233
rest_numeric_enums = True,
214234
service_yaml = "documentai_v1beta2.yaml",
215235
transport = "grpc+rest",
216-
deps = [":documentai_php_proto"],
236+
deps = [
237+
":documentai_php_proto",
238+
],
217239
)
218240

219241
# Open Source Packages
@@ -281,6 +303,7 @@ ruby_cloud_gapic_library(
281303
name = "documentai_ruby_gapic",
282304
srcs = [":documentai_proto_with_info"],
283305
extra_protoc_parameters = ["ruby-cloud-gem-name=google-cloud-documentai-v1beta2"],
306+
grpc_service_config = "documentai_v1beta2_grpc_service_config.json",
284307
rest_numeric_enums = True,
285308
service_yaml = "documentai_v1beta2.yaml",
286309
transport = "grpc",
@@ -313,6 +336,7 @@ load(
313336

314337
csharp_proto_library(
315338
name = "documentai_csharp_proto",
339+
extra_opts = [],
316340
deps = [":documentai_proto"],
317341
)
318342

@@ -349,4 +373,20 @@ csharp_gapic_assembly_pkg(
349373
##############################################################################
350374
# C++
351375
##############################################################################
352-
# Put your C++ rules here
376+
load(
377+
"@com_google_googleapis_imports//:imports.bzl",
378+
"cc_grpc_library",
379+
"cc_proto_library",
380+
)
381+
382+
cc_proto_library(
383+
name = "documentai_cc_proto",
384+
deps = [":documentai_proto"],
385+
)
386+
387+
cc_grpc_library(
388+
name = "documentai_cc_grpc",
389+
srcs = [":documentai_proto"],
390+
grpc_only = True,
391+
deps = [":documentai_cc_proto"],
392+
)
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.documentai.v1beta2;
18+
19+
option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta2";
20+
option go_package = "cloud.google.com/go/documentai/apiv1beta2/documentaipb;documentaipb";
21+
option java_multiple_files = true;
22+
option java_outer_classname = "BarcodeProto";
23+
option java_package = "com.google.cloud.documentai.v1beta2";
24+
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta2";
25+
option ruby_package = "Google::Cloud::DocumentAI::V1beta2";
26+
27+
// Encodes the detailed information of a barcode.
28+
message Barcode {
29+
// Format of a barcode.
30+
// The supported formats are:
31+
//
32+
// - `CODE_128`: Code 128 type.
33+
// - `CODE_39`: Code 39 type.
34+
// - `CODE_93`: Code 93 type.
35+
// - `CODABAR`: Codabar type.
36+
// - `DATA_MATRIX`: 2D Data Matrix type.
37+
// - `ITF`: ITF type.
38+
// - `EAN_13`: EAN-13 type.
39+
// - `EAN_8`: EAN-8 type.
40+
// - `QR_CODE`: 2D QR code type.
41+
// - `UPC_A`: UPC-A type.
42+
// - `UPC_E`: UPC-E type.
43+
// - `PDF417`: PDF417 type.
44+
// - `AZTEC`: 2D Aztec code type.
45+
// - `DATABAR`: GS1 DataBar code type.
46+
string format = 1;
47+
48+
// Value format describes the format of the value that a barcode
49+
// encodes.
50+
// The supported formats are:
51+
//
52+
// - `CONTACT_INFO`: Contact information.
53+
// - `EMAIL`: Email address.
54+
// - `ISBN`: ISBN identifier.
55+
// - `PHONE`: Phone number.
56+
// - `PRODUCT`: Product.
57+
// - `SMS`: SMS message.
58+
// - `TEXT`: Text string.
59+
// - `URL`: URL address.
60+
// - `WIFI`: Wifi information.
61+
// - `GEO`: Geo-localization.
62+
// - `CALENDAR_EVENT`: Calendar event.
63+
// - `DRIVER_LICENSE`: Driver's license.
64+
string value_format = 2;
65+
66+
// Raw value encoded in the barcode.
67+
// For example: `'MEBKM:TITLE:Google;URL:https://www.google.com;;'`.
68+
string raw_value = 3;
69+
}

0 commit comments

Comments
 (0)