@@ -460,6 +460,15 @@ message RagFileParsingConfig {
460460 // project to set an appropriate value here. If unspecified, a default value
461461 // of 120 QPM would be used.
462462 int32 max_parsing_requests_per_min = 2 ;
463+
464+ // The maximum number of requests the job is allowed to make to the Document
465+ // AI processor per minute in this project. Consult
466+ // https://cloud.google.com/document-ai/quotas and the Quota page for your
467+ // project to set an appropriate value here.
468+ // If this value is not specified,
469+ // max_parsing_requests_per_min will be used by indexing
470+ // pipeline as the global limit.
471+ int32 global_max_parsing_requests_per_min = 3 ;
463472 }
464473
465474 // Specifies the advanced parsing for RagFiles.
@@ -476,6 +485,15 @@ message RagFileParsingConfig {
476485 // a default value of 5000 QPM would be used.
477486 int32 max_parsing_requests_per_min = 2 ;
478487
488+ // The maximum number of requests the job is allowed to make to the
489+ // LLM model per minute in this project. Consult
490+ // https://cloud.google.com/vertex-ai/generative-ai/docs/quotas
491+ // and your document size to set an appropriate value here.
492+ // If this value is not specified,
493+ // max_parsing_requests_per_min will be used by indexing pipeline job as the
494+ // global limit.
495+ int32 global_max_parsing_requests_per_min = 4 ;
496+
479497 // The prompt to use for parsing. If not specified, a default prompt will
480498 // be used.
481499 string custom_parsing_prompt = 3 ;
@@ -580,4 +598,13 @@ message ImportRagFilesConfig {
580598 // If unspecified, a default value of 1,000 QPM would be used.
581599 int32 max_embedding_requests_per_min = 5
582600 [(google.api.field_behavior ) = OPTIONAL ];
601+
602+ // Optional. The max number of queries per minute that the indexing pipeline
603+ // job is allowed to make to the embedding model specified in the project.
604+ // Please follow the quota usage guideline of the embedding model you use to
605+ // set the value properly. If this value is not specified,
606+ // max_embedding_requests_per_min will be used by indexing pipeline job as the
607+ // global limit.
608+ int32 global_max_embedding_requests_per_min = 18
609+ [(google.api.field_behavior ) = OPTIONAL ];
583610}
0 commit comments