Skip to content

Commit e096041

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add global quota config to vertex rag engine api
PiperOrigin-RevId: 744892201
1 parent 07c7b13 commit e096041

1 file changed

Lines changed: 27 additions & 0 deletions

File tree

google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,15 @@ message RagFileParsingConfig {
460460
// project to set an appropriate value here. If unspecified, a default value
461461
// of 120 QPM would be used.
462462
int32 max_parsing_requests_per_min = 2;
463+
464+
// The maximum number of requests the job is allowed to make to the Document
465+
// AI processor per minute in this project. Consult
466+
// https://cloud.google.com/document-ai/quotas and the Quota page for your
467+
// project to set an appropriate value here.
468+
// If this value is not specified,
469+
// max_parsing_requests_per_min will be used by indexing
470+
// pipeline as the global limit.
471+
int32 global_max_parsing_requests_per_min = 3;
463472
}
464473

465474
// Specifies the advanced parsing for RagFiles.
@@ -476,6 +485,15 @@ message RagFileParsingConfig {
476485
// a default value of 5000 QPM would be used.
477486
int32 max_parsing_requests_per_min = 2;
478487

488+
// The maximum number of requests the job is allowed to make to the
489+
// LLM model per minute in this project. Consult
490+
// https://cloud.google.com/vertex-ai/generative-ai/docs/quotas
491+
// and your document size to set an appropriate value here.
492+
// If this value is not specified,
493+
// max_parsing_requests_per_min will be used by indexing pipeline job as the
494+
// global limit.
495+
int32 global_max_parsing_requests_per_min = 4;
496+
479497
// The prompt to use for parsing. If not specified, a default prompt will
480498
// be used.
481499
string custom_parsing_prompt = 3;
@@ -580,4 +598,13 @@ message ImportRagFilesConfig {
580598
// If unspecified, a default value of 1,000 QPM would be used.
581599
int32 max_embedding_requests_per_min = 5
582600
[(google.api.field_behavior) = OPTIONAL];
601+
602+
// Optional. The max number of queries per minute that the indexing pipeline
603+
// job is allowed to make to the embedding model specified in the project.
604+
// Please follow the quota usage guideline of the embedding model you use to
605+
// set the value properly. If this value is not specified,
606+
// max_embedding_requests_per_min will be used by indexing pipeline job as the
607+
// global limit.
608+
int32 global_max_embedding_requests_per_min = 18
609+
[(google.api.field_behavior) = OPTIONAL];
583610
}

0 commit comments

Comments
 (0)