Skip to content

Commit 1a30906

Browse files
Google APIscopybara-github
authored andcommitted
feat: RagMetadata and RagDataSchema concepts and Batch API definitions added for Preview
docs: Updated comments for `rag_file_metadata_config` across ImportRagFilesRequest and UploadRagFileRequest PiperOrigin-RevId: 881618214
1 parent d90d23f commit 1a30906

File tree

2 files changed

+619
-3
lines changed

2 files changed

+619
-3
lines changed

google/cloud/aiplatform/v1beta1/vertex_rag_data.proto

Lines changed: 177 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -668,10 +668,12 @@ message UploadRagFileConfig {
668668
// Specifies the transformation config for RagFiles.
669669
RagFileTransformationConfig rag_file_transformation_config = 3;
670670

671-
// Specifies the metadata config for RagFiles.
671+
// Optional. Specifies the metadata config for RagFiles.
672672
// Including paths for metadata schema and metadata.
673673
// Alteratively, inline metadata schema and metadata can be provided.
674-
RagFileMetadataConfig rag_file_metadata_config = 4;
674+
// Deprecated: Not in use.
675+
RagFileMetadataConfig rag_file_metadata_config = 4
676+
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
675677

676678
// Optional. Specifies the parsing config for RagFiles.
677679
// RAG will use the default parser if this field is not set.
@@ -748,7 +750,8 @@ message ImportRagFilesConfig {
748750

749751
// Specifies the metadata config for RagFiles.
750752
// Including paths for metadata schema and metadata.
751-
RagFileMetadataConfig rag_file_metadata_config = 17;
753+
// Deprecated: Not in use.
754+
RagFileMetadataConfig rag_file_metadata_config = 17 [deprecated = true];
752755

753756
// Optional. The max number of queries per minute that this job is allowed to
754757
// make to the embedding model specified on the corpus. This value is specific
@@ -875,3 +878,174 @@ message RagEngineConfig {
875878
// The config of the RagManagedDb used by RagEngine.
876879
RagManagedDbConfig rag_managed_db_config = 2;
877880
}
881+
882+
// The schema of the user specified metadata.
883+
message RagDataSchema {
884+
option (google.api.resource) = {
885+
type: "aiplatform.googleapis.com/RagDataSchema"
886+
pattern: "projects/{project}/locations/{location}/ragCorpora/{rag_corpus}/ragDataSchemas/{rag_data_schema}"
887+
plural: "ragDataSchemas"
888+
singular: "ragDataSchema"
889+
};
890+
891+
// Identifier. Resource name of the data schema in the form of:
892+
// `projects/{project_number}/locations/{location}/ragCorpora/{rag_corpus}/ragDataSchemas/{rag_data_schema}`
893+
// where the {rag_data_schema} part should be the same as the `key` field
894+
// below.
895+
string name = 1 [(google.api.field_behavior) = IDENTIFIER];
896+
897+
// Required. The key of this data schema. This key should be matching the key
898+
// of user specified metadata and unique inside corpus. This value can be up
899+
// to 63 characters, and valid characters are /[a-z][0-9]-/. The first
900+
// character must be a letter, the last could be a letter or a number.
901+
string key = 2 [(google.api.field_behavior) = REQUIRED];
902+
903+
// The schema details mapping to the key.
904+
RagMetadataSchemaDetails schema_details = 3;
905+
}
906+
907+
// Data schema details indicates the data type and the data struct corresponding
908+
// to the key of user specified metadata.
909+
message RagMetadataSchemaDetails {
910+
// Config for List data type.
911+
message ListConfig {
912+
// The value's data type in the list.
913+
RagMetadataSchemaDetails value_schema = 1;
914+
}
915+
916+
// The search strategy for the metadata value of the `key`.
917+
message SearchStrategy {
918+
// The types of search strategies to be applied on the metadata key.
919+
enum SearchStrategyType {
920+
// Unspecified search strategy type.
921+
SEARCH_STRATEGY_TYPE_UNSPECIFIED = 0;
922+
923+
// metadata values of the `key` above will not be searchable.
924+
NO_SEARCH = 1;
925+
926+
// When searching with `key`, the value must be exactly as the metadata
927+
// value that has been ingested.
928+
EXACT_SEARCH = 2;
929+
}
930+
931+
// The search strategy type to be applied on the metadata key.
932+
optional SearchStrategyType search_strategy_type = 1;
933+
}
934+
935+
// Data type of the metadata.
936+
enum DataType {
937+
// Unspecified type.
938+
DATA_TYPE_UNSPECIFIED = 0;
939+
940+
// Integer type.
941+
INTEGER = 1;
942+
943+
// Float type.
944+
FLOAT = 2;
945+
946+
// String type.
947+
STRING = 3;
948+
949+
// Supported formats:
950+
// %Y-%m-%dT%H:%M:%E*S%E*z (absl::RFC3339_full)
951+
// %Y-%m-%dT%H:%M:%E*S
952+
// %Y-%m-%dT%H:%M%E*z
953+
// %Y-%m-%dT%H:%M
954+
// %Y-%m-%dT%H%E*z
955+
// %Y-%m-%dT%H
956+
// %Y-%m-%d%E*z
957+
// %Y-%m-%d
958+
// %Y-%m
959+
// %Y
960+
DATETIME = 4;
961+
962+
// Boolean type.
963+
BOOLEAN = 5;
964+
965+
// List type.
966+
// - Each element in the list must be of the exact same data schema;
967+
// otherwise, they are invalid arguments.
968+
// - Elements cannot be another list (no list of list).
969+
LIST = 6;
970+
}
971+
972+
// The granularity of metadata under this DataSchema.
973+
enum Granularity {
974+
// Unspecified granularity.
975+
GRANULARITY_UNSPECIFIED = 0;
976+
977+
// RagFile-level granularity.
978+
GRANULARITY_FILE_LEVEL = 1;
979+
}
980+
981+
// Type of the metadata.
982+
optional DataType type = 1;
983+
984+
// Config for List data type.
985+
ListConfig list_config = 2;
986+
987+
// The granularity associated with this RagMetadataSchema.
988+
optional Granularity granularity = 3;
989+
990+
// The search strategy for the metadata value of the `key`.
991+
optional SearchStrategy search_strategy = 4;
992+
}
993+
994+
// Metadata for RagFile provided by users.
995+
message RagMetadata {
996+
option (google.api.resource) = {
997+
type: "aiplatform.googleapis.com/RagMetadata"
998+
pattern: "projects/{project}/locations/{location}/ragCorpora/{rag_corpus}/ragFiles/{rag_file}/ragMetadata/{rag_metadata}"
999+
plural: "ragMetadata"
1000+
singular: "ragMetadata"
1001+
};
1002+
1003+
// Identifier. Resource name of the RagMetadata.
1004+
// Format:
1005+
// `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}/ragFiles/{rag_file}/ragMetadata/{rag_metadata}`
1006+
string name = 1 [(google.api.field_behavior) = IDENTIFIER];
1007+
1008+
// User provided metadata.
1009+
UserSpecifiedMetadata user_specified_metadata = 2;
1010+
}
1011+
1012+
// Metadata provided by users.
1013+
message UserSpecifiedMetadata {
1014+
// Required. Key of the metadata. The key must be set with type by
1015+
// CreateRagDataSchema.
1016+
string key = 1 [(google.api.field_behavior) = REQUIRED];
1017+
1018+
// Value of the metadata. The value must be able to convert
1019+
// to the type according to the data schema.
1020+
MetadataValue value = 2;
1021+
}
1022+
1023+
// Value of Metadata, including all types available in data schema.
1024+
message MetadataValue {
1025+
// The value of the metadata.
1026+
oneof value {
1027+
// Value of int type metadata.
1028+
int64 int_value = 1;
1029+
1030+
// Value of float type metadata.
1031+
float float_value = 2;
1032+
1033+
// Value of string type metadata.
1034+
string str_value = 3;
1035+
1036+
// Value of date time type metadata.
1037+
string datetime_value = 4;
1038+
1039+
// Value of boolean type metadata.
1040+
bool bool_value = 5;
1041+
1042+
// Value of list type metadata.
1043+
MetadataList list_value = 6;
1044+
}
1045+
}
1046+
1047+
// List representation in metadata.
1048+
message MetadataList {
1049+
// The values of `LIST` data type metadata.
1050+
repeated MetadataValue values = 1;
1051+
}

0 commit comments

Comments
 (0)