Skip to content

Commit a04a5aa

Browse files
Google APIscopybara-github
authored andcommitted
feat: add ANN feature for RagManagedDb
PiperOrigin-RevId: 757834804
1 parent f84a9bf commit a04a5aa

3 files changed

Lines changed: 110 additions & 16 deletions

File tree

google/cloud/aiplatform/v1/aiplatform_v1.yaml

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ http:
287287
- post: '/ui/{name=projects/*/locations/*/trainingPipelines/*/operations/*}:cancel'
288288
- post: '/ui/{name=projects/*/locations/*/pipelineJobs/*/operations/*}:cancel'
289289
- post: '/ui/{name=projects/*/locations/*/schedules/*/operations/*}:cancel'
290+
- post: '/ui/{name=projects/*/locations/*/ragEngineConfig/operations/*}:cancel'
290291
- post: '/ui/{name=projects/*/locations/*/specialistPools/*/operations/*}:cancel'
291292
- post: '/ui/{name=projects/*/locations/*/tensorboards/*/operations/*}:cancel'
292293
- post: '/ui/{name=projects/*/locations/*/tensorboards/*/experiments/*/operations/*}:cancel'
@@ -321,6 +322,7 @@ http:
321322
- post: '/v1/{name=projects/*/locations/*/notebookRuntimes/*/operations/*}:cancel'
322323
- post: '/v1/{name=projects/*/locations/*/notebookRuntimeTemplates/*/operations/*}:cancel'
323324
- post: '/v1/{name=projects/*/locations/*/persistentResources/*/operations/*}:cancel'
325+
- post: '/v1/{name=projects/*/locations/*/ragEngineConfig/operations/*}:cancel'
324326
- post: '/v1/{name=projects/*/locations/*/ragCorpora/*/operations/*}:cancel'
325327
- post: '/v1/{name=projects/*/locations/*/ragCorpora/*/ragFiles/*/operations/*}:cancel'
326328
- post: '/v1/{name=projects/*/locations/*/reasoningEngines/*/operations/*}:cancel'
@@ -376,6 +378,7 @@ http:
376378
- delete: '/ui/{name=projects/*/locations/*/pipelineJobs/*/operations/*}'
377379
- delete: '/ui/{name=projects/*/locations/*/schedules/*/operations/*}'
378380
- delete: '/ui/{name=projects/*/locations/*/specialistPools/*/operations/*}'
381+
- delete: '/ui/{name=projects/*/locations/*/ragEngineConfig/operations/*}'
379382
- delete: '/ui/{name=projects/*/locations/*/tensorboards/*/operations/*}'
380383
- delete: '/ui/{name=projects/*/locations/*/tensorboards/*/experiments/*/operations/*}'
381384
- delete: '/ui/{name=projects/*/locations/*/tensorboards/*/experiments/*/runs/*/operations/*}'
@@ -412,6 +415,7 @@ http:
412415
- delete: '/v1/{name=projects/*/locations/*/notebookExecutionJobs/*/operations/*}'
413416
- delete: '/v1/{name=projects/*/locations/*/notebookRuntimes/*/operations/*}'
414417
- delete: '/v1/{name=projects/*/locations/*/notebookRuntimeTemplates/*/operations/*}'
418+
- delete: '/v1/{name=projects/*/locations/*/ragEngineConfig/operations/*}'
415419
- delete: '/v1/{name=projects/*/locations/*/ragCorpora/*/operations/*}'
416420
- delete: '/v1/{name=projects/*/locations/*/ragCorpora/*/ragFiles/*/operations/*}'
417421
- delete: '/v1/{name=projects/*/locations/*/reasoningEngines/*/operations/*}'
@@ -472,6 +476,7 @@ http:
472476
- get: '/ui/{name=projects/*/locations/*/studies/*/trials/*/operations/*}'
473477
- get: '/ui/{name=projects/*/locations/*/trainingPipelines/*/operations/*}'
474478
- get: '/ui/{name=projects/*/locations/*/pipelineJobs/*/operations/*}'
479+
- get: '/ui/{name=projects/*/locations/*/ragEngineConfig/operations/*}'
475480
- get: '/ui/{name=projects/*/locations/*/schedules/*/operations/*}'
476481
- get: '/ui/{name=projects/*/locations/*/specialistPools/*/operations/*}'
477482
- get: '/ui/{name=projects/*/locations/*/tensorboards/*/operations/*}'
@@ -511,6 +516,7 @@ http:
511516
- get: '/v1/{name=projects/*/locations/*/notebookExecutionJobs/*/operations/*}'
512517
- get: '/v1/{name=projects/*/locations/*/notebookRuntimes/*/operations/*}'
513518
- get: '/v1/{name=projects/*/locations/*/notebookRuntimeTemplates/*/operations/*}'
519+
- get: '/v1/{name=projects/*/locations/*/ragEngineConfig/operations/*}'
514520
- get: '/v1/{name=projects/*/locations/*/ragCorpora/*/operations/*}'
515521
- get: '/v1/{name=projects/*/locations/*/ragCorpora/*/ragFiles/*/operations/*}'
516522
- get: '/v1/{name=projects/*/locations/*/reasoningEngines/*/operations/*}'
@@ -570,6 +576,7 @@ http:
570576
- get: '/ui/{name=projects/*/locations/*/trainingPipelines/*}/operations'
571577
- get: '/ui/{name=projects/*/locations/*/persistentResources/*}/operations'
572578
- get: '/ui/{name=projects/*/locations/*/pipelineJobs/*}/operations'
579+
- get: '/ui/{name=projects/*/locations/*/ragEngineConfig}/operations'
573580
- get: '/ui/{name=projects/*/locations/*/schedules/*}/operations'
574581
- get: '/ui/{name=projects/*/locations/*/specialistPools/*}/operations'
575582
- get: '/ui/{name=projects/*/locations/*/tensorboards/*}/operations'
@@ -615,6 +622,7 @@ http:
615622
- get: '/v1/{name=projects/*/locations/*/trainingPipelines/*}/operations'
616623
- get: '/v1/{name=projects/*/locations/*/persistentResources/*}/operations'
617624
- get: '/v1/{name=projects/*/locations/*/pipelineJobs/*}/operations'
625+
- get: '/v1/{name=projects/*/locations/*/ragEngineConfig}/operations'
618626
- get: '/v1/{name=projects/*/locations/*/ragCorpora/*}/operations'
619627
- get: '/v1/{name=projects/*/locations/*/ragCorpora/*/ragFiles/*}/operations'
620628
- get: '/v1/{name=projects/*/locations/*/schedules/*}/operations'
@@ -670,6 +678,7 @@ http:
670678
- post: '/ui/{name=projects/*/locations/*/pipelineJobs/*/operations/*}:wait'
671679
- post: '/ui/{name=projects/*/locations/*/schedules/*/operations/*}:wait'
672680
- post: '/ui/{name=projects/*/locations/*/specialistPools/*/operations/*}:wait'
681+
- post: '/ui/{name=projects/*/locations/*/ragEngineConfig/operations/*}:wait'
673682
- post: '/ui/{name=projects/*/locations/*/tensorboards/*/operations/*}:wait'
674683
- post: '/ui/{name=projects/*/locations/*/tensorboards/*/experiments/*/operations/*}:wait'
675684
- post: '/ui/{name=projects/*/locations/*/tensorboards/*/experiments/*/runs/*/operations/*}:wait'
@@ -706,6 +715,7 @@ http:
706715
- post: '/v1/{name=projects/*/locations/*/notebookExecutionJobs/*/operations/*}:wait'
707716
- post: '/v1/{name=projects/*/locations/*/notebookRuntimes/*/operations/*}:wait'
708717
- post: '/v1/{name=projects/*/locations/*/notebookRuntimeTemplates/*/operations/*}:wait'
718+
- post: '/v1/{name=projects/*/locations/*/ragEngineConfig/operations/*}:wait'
709719
- post: '/v1/{name=projects/*/locations/*/ragCorpora/*/operations/*}:wait'
710720
- post: '/v1/{name=projects/*/locations/*/ragCorpora/*/ragFiles/*/operations/*}:wait'
711721
- post: '/v1/{name=projects/*/locations/*/reasoningEngines/*/operations/*}:wait'
@@ -955,25 +965,11 @@ publishing:
955965
organization: CLOUD
956966
library_settings:
957967
- version: google.cloud.aiplatform.v1
958-
java_settings:
959-
common: {}
960-
cpp_settings:
961-
common: {}
962-
php_settings:
963-
common: {}
964968
python_settings:
965-
common: {}
966969
experimental_features:
967970
rest_async_io_enabled: true
968-
node_settings:
969-
common: {}
970971
dotnet_settings:
971-
common: {}
972972
renamed_resources:
973973
datalabeling.googleapis.com/Dataset: DataLabelingDataset
974974
automl.googleapis.com/Dataset: AutoMLDataset
975975
automl.googleapis.com/Model: AutoMLModel
976-
ruby_settings:
977-
common: {}
978-
go_settings:
979-
common: {}

google/cloud/aiplatform/v1/vertex_rag_data.proto

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,47 @@ message RagEmbeddingModelConfig {
7676
// Config for the Vector DB to use for RAG.
7777
message RagVectorDbConfig {
7878
// The config for the default RAG-managed Vector DB.
79-
message RagManagedDb {}
79+
message RagManagedDb {
80+
// Config for KNN search.
81+
message KNN {}
82+
83+
// Config for ANN search.
84+
//
85+
// RagManagedDb uses a tree-based structure to partition data and
86+
// facilitate faster searches. As a tradeoff, it requires longer indexing
87+
// time and manual triggering of index rebuild via the ImportRagFiles and
88+
// UpdateRagCorpus API.
89+
message ANN {
90+
// The depth of the tree-based structure. Only depth values of 2 and 3 are
91+
// supported.
92+
//
93+
// Recommended value is 2 if you have if you have O(10K) files in the
94+
// RagCorpus and set this to 3 if more than that.
95+
//
96+
// Default value is 2.
97+
int32 tree_depth = 1;
98+
99+
// Number of leaf nodes in the tree-based structure. Each leaf node
100+
// contains groups of closely related vectors along with their
101+
// corresponding centroid.
102+
//
103+
// Recommended value is 10 * sqrt(num of RagFiles in your RagCorpus).
104+
//
105+
// Default value is 500.
106+
int32 leaf_count = 2;
107+
}
108+
109+
// Choice of retrieval strategy.
110+
oneof retrieval_strategy {
111+
// Performs a KNN search on RagCorpus.
112+
// Default choice if not specified.
113+
KNN knn = 1;
114+
115+
// Performs an ANN search on RagCorpus. Use this if you have a lot of
116+
// files (> 10K) in your RagCorpus and want to reduce the search latency.
117+
ANN ann = 2;
118+
}
119+
}
80120

81121
// The config for the Pinecone.
82122
message Pinecone {
@@ -453,4 +493,13 @@ message ImportRagFilesConfig {
453493
// If unspecified, a default value of 1,000 QPM would be used.
454494
int32 max_embedding_requests_per_min = 5
455495
[(google.api.field_behavior) = OPTIONAL];
496+
497+
// Rebuilds the ANN index to optimize for recall on the imported data.
498+
// Only applicable for RagCorpora running on RagManagedDb with
499+
// `retrieval_strategy` set to `ANN`. The rebuild will be performed using the
500+
// existing ANN config set on the RagCorpus. To change the ANN config, please
501+
// use the UpdateRagCorpus API.
502+
//
503+
// Default is false, i.e., index is not rebuilt.
504+
bool rebuild_ann_index = 19;
456505
}

google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,47 @@ message RagEmbeddingModelConfig {
119119
// Config for the Vector DB to use for RAG.
120120
message RagVectorDbConfig {
121121
// The config for the default RAG-managed Vector DB.
122-
message RagManagedDb {}
122+
message RagManagedDb {
123+
// Config for KNN search.
124+
message KNN {}
125+
126+
// Config for ANN search.
127+
//
128+
// RagManagedDb uses a tree-based structure to partition data and
129+
// facilitate faster searches. As a tradeoff, it requires longer indexing
130+
// time and manual triggering of index rebuild via the ImportRagFiles and
131+
// UpdateRagCorpus API.
132+
message ANN {
133+
// The depth of the tree-based structure. Only depth values of 2 and 3 are
134+
// supported.
135+
//
136+
// Recommended value is 2 if you have if you have O(10K) files in the
137+
// RagCorpus and set this to 3 if more than that.
138+
//
139+
// Default value is 2.
140+
int32 tree_depth = 1;
141+
142+
// Number of leaf nodes in the tree-based structure. Each leaf node
143+
// contains groups of closely related vectors along with their
144+
// corresponding centroid.
145+
//
146+
// Recommended value is 10 * sqrt(num of RagFiles in your RagCorpus).
147+
//
148+
// Default value is 500.
149+
int32 leaf_count = 2;
150+
}
151+
152+
// Choice of retrieval strategy.
153+
oneof retrieval_strategy {
154+
// Performs a KNN search on RagCorpus.
155+
// Default choice if not specified.
156+
KNN knn = 1;
157+
158+
// Performs an ANN search on RagCorpus. Use this if you have a lot of
159+
// files (> 10K) in your RagCorpus and want to reduce the search latency.
160+
ANN ann = 2;
161+
}
162+
}
123163

124164
// The config for the Weaviate.
125165
message Weaviate {
@@ -617,6 +657,15 @@ message ImportRagFilesConfig {
617657
// global limit.
618658
int32 global_max_embedding_requests_per_min = 18
619659
[(google.api.field_behavior) = OPTIONAL];
660+
661+
// Rebuilds the ANN index to optimize for recall on the imported data.
662+
// Only applicable for RagCorpora running on RagManagedDb with
663+
// `retrieval_strategy` set to `ANN`. The rebuild will be performed using the
664+
// existing ANN config set on the RagCorpus. To change the ANN config, please
665+
// use the UpdateRagCorpus API.
666+
//
667+
// Default is false, i.e., index is not rebuilt.
668+
bool rebuild_ann_index = 19;
620669
}
621670

622671
// Configuration message for RagManagedDb used by RagEngine.

0 commit comments

Comments
 (0)