feat: add Layout Parser to RAG v1 API

Google APIs · copybara-github · commit f4f8efb9e05e · 2025-03-04T18:09:10.000-08:00
PiperOrigin-RevId: 733531494
diff --git a/google/cloud/aiplatform/v1/aiplatform_v1.yaml b/google/cloud/aiplatform/v1/aiplatform_v1.yaml
@@ -33,6 +33,8 @@ apis:
 - name: google.cloud.aiplatform.v1.ScheduleService
 - name: google.cloud.aiplatform.v1.SpecialistPoolService
 - name: google.cloud.aiplatform.v1.TensorboardService
+- name: google.cloud.aiplatform.v1.VertexRagDataService
+- name: google.cloud.aiplatform.v1.VertexRagService
 - name: google.cloud.aiplatform.v1.VizierService
 - name: google.cloud.location.Locations
 - name: google.iam.v1.IAMPolicy
diff --git a/google/cloud/aiplatform/v1/vertex_rag_data.proto b/google/cloud/aiplatform/v1/vertex_rag_data.proto
@@ -293,6 +293,33 @@ message RagFileTransformationConfig {
   RagFileChunkingConfig rag_file_chunking_config = 1;
 }
 
+// Specifies the parsing config for RagFiles.
+message RagFileParsingConfig {
+  // Document AI Layout Parser config.
+  message LayoutParser {
+    // The full resource name of a Document AI processor or processor version.
+    // The processor must have type `LAYOUT_PARSER_PROCESSOR`. If specified, the
+    // `additional_config.parse_as_scanned_pdf` field must be false.
+    // Format:
+    // * `projects/{project_id}/locations/{location}/processors/{processor_id}`
+    // * `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
+    string processor_name = 1;
+
+    // The maximum number of requests the job is allowed to make to the Document
+    // AI processor per minute. Consult
+    // https://cloud.google.com/document-ai/quotas and the Quota page for your
+    // project to set an appropriate value here. If unspecified, a default value
+    // of 120 QPM would be used.
+    int32 max_parsing_requests_per_min = 2;
+  }
+
+  // The parser to use for RagFiles.
+  oneof parser {
+    // The Layout Parser to use for RagFiles.
+    LayoutParser layout_parser = 4;
+  }
+}
+
 // Config for uploading RagFile.
 message UploadRagFileConfig {
   // Specifies the transformation config for RagFiles.
@@ -343,6 +370,11 @@ message ImportRagFilesConfig {
   // Specifies the transformation config for RagFiles.
   RagFileTransformationConfig rag_file_transformation_config = 16;
 
+  // Optional. Specifies the parsing config for RagFiles.
+  // RAG will use the default parser if this field is not set.
+  RagFileParsingConfig rag_file_parsing_config = 8
+      [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. The max number of queries per minute that this job is allowed to
   // make to the embedding model specified on the corpus. This value is specific
   // to this job and not shared across other import jobs. Consult the Quotas