Skip to content

Commit 4cdc2aa

Browse files
Google APIscopybara-github
authored andcommitted
feat: Introduce RagFileMetadataConfig for importing metadata to Rag
PiperOrigin-RevId: 770274285
1 parent 111b738 commit 4cdc2aa

1 file changed

Lines changed: 61 additions & 0 deletions

File tree

google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,10 @@ message RagFile {
462462

463463
// Output only. State of the RagFile.
464464
FileStatus file_status = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
465+
466+
// Output only. The metadata for metadata search. The contents will be
467+
// be in JSON format.
468+
string user_metadata = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
465469
}
466470

467471
// A RagChunk includes the content of a chunk of a RagFile, and associated
@@ -592,13 +596,66 @@ message RagFileParsingConfig {
592596
bool use_advanced_pdf_parsing = 2 [deprecated = true];
593597
}
594598

599+
// Metadata config for RagFile.
600+
message RagFileMetadataConfig {
601+
// Specifies the metadata schema source.
602+
oneof metadata_schema_source {
603+
// Google Cloud Storage location. Supports importing individual files as
604+
// well as entire Google Cloud Storage directories. Sample formats:
605+
// - `gs://bucket_name/my_directory/object_name/metadata_schema.json`
606+
// - `gs://bucket_name/my_directory`
607+
// If providing a directory, the metadata schema will be read from
608+
// the files that ends with "metadata_schema.json" in the directory.
609+
GcsSource gcs_metadata_schema_source = 1;
610+
611+
// Google Drive location. Supports importing individual files as
612+
// well as Google Drive folders.
613+
// If providing a folder, the metadata schema will be read from
614+
// the files that ends with "metadata_schema.json" in the directory.
615+
GoogleDriveSource google_drive_metadata_schema_source = 2;
616+
617+
// Inline metadata schema source. Must be a JSON string.
618+
string inline_metadata_schema_source = 3;
619+
}
620+
621+
// Specifies the metadata source.
622+
oneof metadata_source {
623+
// Google Cloud Storage location. Supports importing individual files as
624+
// well as entire Google Cloud Storage directories. Sample formats:
625+
// - `gs://bucket_name/my_directory/object_name/metadata.json`
626+
// - `gs://bucket_name/my_directory`
627+
// If providing a directory, the metadata will be read from
628+
// the files that ends with "metadata.json" in the directory.
629+
GcsSource gcs_metadata_source = 4;
630+
631+
// Google Drive location. Supports importing individual files as
632+
// well as Google Drive folders.
633+
// If providing a directory, the metadata will be read from
634+
// the files that ends with "metadata.json" in the directory.
635+
GoogleDriveSource google_drive_metadata_source = 5;
636+
637+
// Inline metadata source. Must be a JSON string.
638+
string inline_metadata_source = 6;
639+
}
640+
}
641+
595642
// Config for uploading RagFile.
596643
message UploadRagFileConfig {
597644
// Specifies the size and overlap of chunks after uploading RagFile.
598645
RagFileChunkingConfig rag_file_chunking_config = 1 [deprecated = true];
599646

600647
// Specifies the transformation config for RagFiles.
601648
RagFileTransformationConfig rag_file_transformation_config = 3;
649+
650+
// Specifies the metadata config for RagFiles.
651+
// Including paths for metadata schema and metadata.
652+
// Alteratively, inline metadata schema and metadata can be provided.
653+
RagFileMetadataConfig rag_file_metadata_config = 4;
654+
655+
// Optional. Specifies the parsing config for RagFiles.
656+
// RAG will use the default parser if this field is not set.
657+
RagFileParsingConfig rag_file_parsing_config = 5
658+
[(google.api.field_behavior) = OPTIONAL];
602659
}
603660

604661
// Config for importing RagFiles.
@@ -668,6 +725,10 @@ message ImportRagFilesConfig {
668725
RagFileParsingConfig rag_file_parsing_config = 8
669726
[(google.api.field_behavior) = OPTIONAL];
670727

728+
// Specifies the metadata config for RagFiles.
729+
// Including paths for metadata schema and metadata.
730+
RagFileMetadataConfig rag_file_metadata_config = 17;
731+
671732
// Optional. The max number of queries per minute that this job is allowed to
672733
// make to the embedding model specified on the corpus. This value is specific
673734
// to this job and not shared across other import jobs. Consult the Quotas

0 commit comments

Comments
 (0)