@@ -293,6 +293,33 @@ message RagFileTransformationConfig {
293293 RagFileChunkingConfig rag_file_chunking_config = 1 ;
294294}
295295
296+ // Specifies the parsing config for RagFiles.
297+ message RagFileParsingConfig {
298+ // Document AI Layout Parser config.
299+ message LayoutParser {
300+ // The full resource name of a Document AI processor or processor version.
301+ // The processor must have type `LAYOUT_PARSER_PROCESSOR`. If specified, the
302+ // `additional_config.parse_as_scanned_pdf` field must be false.
303+ // Format:
304+ // * `projects/{project_id}/locations/{location}/processors/{processor_id}`
305+ // * `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
306+ string processor_name = 1 ;
307+
308+ // The maximum number of requests the job is allowed to make to the Document
309+ // AI processor per minute. Consult
310+ // https://cloud.google.com/document-ai/quotas and the Quota page for your
311+ // project to set an appropriate value here. If unspecified, a default value
312+ // of 120 QPM would be used.
313+ int32 max_parsing_requests_per_min = 2 ;
314+ }
315+
316+ // The parser to use for RagFiles.
317+ oneof parser {
318+ // The Layout Parser to use for RagFiles.
319+ LayoutParser layout_parser = 4 ;
320+ }
321+ }
322+
296323// Config for uploading RagFile.
297324message UploadRagFileConfig {
298325 // Specifies the transformation config for RagFiles.
@@ -343,6 +370,11 @@ message ImportRagFilesConfig {
343370 // Specifies the transformation config for RagFiles.
344371 RagFileTransformationConfig rag_file_transformation_config = 16 ;
345372
373+ // Optional. Specifies the parsing config for RagFiles.
374+ // RAG will use the default parser if this field is not set.
375+ RagFileParsingConfig rag_file_parsing_config = 8
376+ [(google.api.field_behavior ) = OPTIONAL ];
377+
346378 // Optional. The max number of queries per minute that this job is allowed to
347379 // make to the embedding model specified on the corpus. This value is specific
348380 // to this job and not shared across other import jobs. Consult the Quotas
0 commit comments