Skip to content

Commit d427057

Browse files
Google APIscopybara-github
authored andcommitted
feat: add page spans in retrieved contexts from Vertex RAG Engine in aiplatform v1beta1
docs: A comment for field `rag_files_count` in message `.google.cloud.aiplatform.v1beta1.RagCorpus` is changed docs: A comment for field `model_name` in message `.google.cloud.aiplatform.v1beta1.RagFileParsingConfig` is changed PiperOrigin-RevId: 741268484
1 parent 5b04a07 commit d427057

3 files changed

Lines changed: 54 additions & 18 deletions

File tree

google/cloud/aiplatform/v1beta1/content.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import "google/api/field_behavior.proto";
2020
import "google/api/resource.proto";
2121
import "google/cloud/aiplatform/v1beta1/openapi.proto";
2222
import "google/cloud/aiplatform/v1beta1/tool.proto";
23+
import "google/cloud/aiplatform/v1beta1/vertex_rag_data.proto";
2324
import "google/protobuf/duration.proto";
2425
import "google/type/date.proto";
2526

@@ -631,6 +632,13 @@ message GroundingChunk {
631632

632633
// Chunk from context retrieved by the retrieval tools.
633634
message RetrievedContext {
635+
// Tool-specific details about the retrieved context.
636+
oneof context_details {
637+
// Additional context for the RAG retrieval result. This is only populated
638+
// when using the RAG retrieval tool.
639+
RagChunk rag_chunk = 4;
640+
}
641+
634642
// URI reference of the attribution.
635643
optional string uri = 1;
636644

google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,22 @@ message RagCorpus {
254254
singular: "ragCorpus"
255255
};
256256

257+
// The backend config of the RagCorpus.
258+
// It can be data store and/or retrieval engine.
259+
oneof backend_config {
260+
// Optional. Immutable. The config for the Vector DBs.
261+
RagVectorDbConfig vector_db_config = 9 [
262+
(google.api.field_behavior) = OPTIONAL,
263+
(google.api.field_behavior) = IMMUTABLE
264+
];
265+
266+
// Optional. Immutable. The config for the Vertex AI Search.
267+
VertexAiSearchConfig vertex_ai_search_config = 10 [
268+
(google.api.field_behavior) = OPTIONAL,
269+
(google.api.field_behavior) = IMMUTABLE
270+
];
271+
}
272+
257273
// Output only. The resource name of the RagCorpus.
258274
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
259275

@@ -290,23 +306,8 @@ message RagCorpus {
290306
// Output only. RagCorpus state.
291307
CorpusStatus corpus_status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
292308

293-
// The backend config of the RagCorpus.
294-
// It can be data store and/or retrieval engine.
295-
oneof backend_config {
296-
// Optional. Immutable. The config for the Vector DBs.
297-
RagVectorDbConfig vector_db_config = 9 [
298-
(google.api.field_behavior) = OPTIONAL,
299-
(google.api.field_behavior) = IMMUTABLE
300-
];
301-
302-
// Optional. Immutable. The config for the Vertex AI Search.
303-
VertexAiSearchConfig vertex_ai_search_config = 10 [
304-
(google.api.field_behavior) = OPTIONAL,
305-
(google.api.field_behavior) = IMMUTABLE
306-
];
307-
}
308-
// Output only. The number of RagFiles in the RagCorpus.
309-
int32 rag_files_count = 11;
309+
// Output only. Number of RagFiles in the RagCorpus.
310+
int32 rag_files_count = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
310311
}
311312

312313
// A RagFile contains user data for chunking, embedding and indexing.
@@ -386,6 +387,25 @@ message RagFile {
386387
FileStatus file_status = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
387388
}
388389

390+
// A RagChunk includes the content of a chunk of a RagFile, and associated
391+
// metadata.
392+
message RagChunk {
393+
// Represents where the chunk starts and ends in the document.
394+
message PageSpan {
395+
// Page where chunk starts in the document. Inclusive. 1-indexed.
396+
int32 first_page = 1;
397+
398+
// Page where chunk ends in the document. Inclusive. 1-indexed.
399+
int32 last_page = 2;
400+
}
401+
402+
// The content of the chunk.
403+
string text = 1;
404+
405+
// If populated, represents where the chunk starts and ends in the document.
406+
optional PageSpan page_span = 2;
407+
}
408+
389409
// Specifies the size and overlap of chunks for RagFiles.
390410
message RagFileChunkingConfig {
391411
// Specifies the fixed length chunking config.
@@ -445,7 +465,8 @@ message RagFileParsingConfig {
445465
// Specifies the advanced parsing for RagFiles.
446466
message LlmParser {
447467
// The name of a LLM model used for parsing.
448-
// Format: `gemini-1.5-pro-002`
468+
// Format:
469+
// * `projects/{project_id}/locations/{location}/publishers/{publisher}/models/{model}`
449470
string model_name = 1;
450471

451472
// The maximum number of requests the job is allowed to make to the

google/cloud/aiplatform/v1beta1/vertex_rag_service.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import "google/api/field_behavior.proto";
2222
import "google/api/resource.proto";
2323
import "google/cloud/aiplatform/v1beta1/content.proto";
2424
import "google/cloud/aiplatform/v1beta1/tool.proto";
25+
import "google/cloud/aiplatform/v1beta1/vertex_rag_data.proto";
2526

2627
option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
2728
option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
@@ -194,6 +195,9 @@ message RagContexts {
194195
// less relevant the context is to the query. The range is [0, 2], while 0
195196
// means the most relevant and 2 means the least relevant.
196197
optional double score = 6;
198+
199+
// Context of the retrieved chunk.
200+
RagChunk chunk = 7;
197201
}
198202

199203
// All its contexts.
@@ -323,6 +327,9 @@ message Fact {
323327
// relevant the fact is to the query. The range is [0, 2], while 0 means the
324328
// most relevant and 2 means the least relevant.
325329
optional double score = 6;
330+
331+
// If present, chunk properties.
332+
optional RagChunk chunk = 7;
326333
}
327334

328335
// Claim that is extracted from the input text and facts that support it.

0 commit comments

Comments
 (0)