Skip to content

Commit 5b04a07

Browse files
Google APIscopybara-github
authored andcommitted
feat: add page spans in retrieved contexts from Vertex RAG Engine in aiplatform v1
PiperOrigin-RevId: 741233157
1 parent ec79b93 commit 5b04a07

3 files changed

Lines changed: 50 additions & 16 deletions

File tree

google/cloud/aiplatform/v1/content.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package google.cloud.aiplatform.v1;
1919
import "google/api/field_behavior.proto";
2020
import "google/cloud/aiplatform/v1/openapi.proto";
2121
import "google/cloud/aiplatform/v1/tool.proto";
22+
import "google/cloud/aiplatform/v1/vertex_rag_data.proto";
2223
import "google/protobuf/duration.proto";
2324
import "google/type/date.proto";
2425

@@ -538,6 +539,13 @@ message GroundingChunk {
538539

539540
// Chunk from context retrieved by the retrieval tools.
540541
message RetrievedContext {
542+
// Tool-specific details about the retrieved context.
543+
oneof context_details {
544+
// Additional context for the RAG retrieval result. This is only populated
545+
// when using the RAG retrieval tool.
546+
RagChunk rag_chunk = 4;
547+
}
548+
541549
// URI reference of the attribution.
542550
optional string uri = 1;
543551

google/cloud/aiplatform/v1/vertex_rag_data.proto

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,22 @@ message RagCorpus {
186186
singular: "ragCorpus"
187187
};
188188

189+
// The backend config of the RagCorpus.
190+
// It can be data store and/or retrieval engine.
191+
oneof backend_config {
192+
// Optional. Immutable. The config for the Vector DBs.
193+
RagVectorDbConfig vector_db_config = 9 [
194+
(google.api.field_behavior) = OPTIONAL,
195+
(google.api.field_behavior) = IMMUTABLE
196+
];
197+
198+
// Optional. Immutable. The config for the Vertex AI Search.
199+
VertexAiSearchConfig vertex_ai_search_config = 10 [
200+
(google.api.field_behavior) = OPTIONAL,
201+
(google.api.field_behavior) = IMMUTABLE
202+
];
203+
}
204+
189205
// Output only. The resource name of the RagCorpus.
190206
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
191207

@@ -207,22 +223,6 @@ message RagCorpus {
207223

208224
// Output only. RagCorpus state.
209225
CorpusStatus corpus_status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
210-
211-
// The backend config of the RagCorpus.
212-
// It can be data store and/or retrieval engine.
213-
oneof backend_config {
214-
// Optional. Immutable. The config for the Vector DBs.
215-
RagVectorDbConfig vector_db_config = 9 [
216-
(google.api.field_behavior) = OPTIONAL,
217-
(google.api.field_behavior) = IMMUTABLE
218-
];
219-
220-
// Optional. Immutable. The config for the Vertex AI Search.
221-
VertexAiSearchConfig vertex_ai_search_config = 10 [
222-
(google.api.field_behavior) = OPTIONAL,
223-
(google.api.field_behavior) = IMMUTABLE
224-
];
225-
}
226226
}
227227

228228
// A RagFile contains user data for chunking, embedding and indexing.
@@ -284,6 +284,25 @@ message RagFile {
284284
FileStatus file_status = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
285285
}
286286

287+
// A RagChunk includes the content of a chunk of a RagFile, and associated
288+
// metadata.
289+
message RagChunk {
290+
// Represents where the chunk starts and ends in the document.
291+
message PageSpan {
292+
// Page where chunk starts in the document. Inclusive. 1-indexed.
293+
int32 first_page = 1;
294+
295+
// Page where chunk ends in the document. Inclusive. 1-indexed.
296+
int32 last_page = 2;
297+
}
298+
299+
// The content of the chunk.
300+
string text = 1;
301+
302+
// If populated, represents where the chunk starts and ends in the document.
303+
optional PageSpan page_span = 2;
304+
}
305+
287306
// Specifies the size and overlap of chunks for RagFiles.
288307
message RagFileChunkingConfig {
289308
// Specifies the fixed length chunking config.

google/cloud/aiplatform/v1/vertex_rag_service.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
2222
import "google/api/resource.proto";
2323
import "google/cloud/aiplatform/v1/content.proto";
2424
import "google/cloud/aiplatform/v1/tool.proto";
25+
import "google/cloud/aiplatform/v1/vertex_rag_data.proto";
2526

2627
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
2728
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
@@ -164,6 +165,9 @@ message RagContexts {
164165
// less relevant the context is to the query. The range is [0, 2], while 0
165166
// means the most relevant and 2 means the least relevant.
166167
optional double score = 6;
168+
169+
// Context of the retrieved chunk.
170+
RagChunk chunk = 7;
167171
}
168172

169173
// All its contexts.
@@ -293,6 +297,9 @@ message Fact {
293297
// relevant the fact is to the query. The range is [0, 2], while 0 means the
294298
// most relevant and 2 means the least relevant.
295299
optional double score = 6;
300+
301+
// If present, chunk properties.
302+
optional RagChunk chunk = 7;
296303
}
297304

298305
// Claim that is extracted from the input text and facts that support it.

0 commit comments

Comments
 (0)