@@ -22,11 +22,13 @@ import "google/api/field_behavior.proto";
2222import "google/api/resource.proto" ;
2323import "google/cloud/aiplatform/v1beta1/annotation.proto" ;
2424import "google/cloud/aiplatform/v1beta1/annotation_spec.proto" ;
25+ import "google/cloud/aiplatform/v1beta1/content.proto" ;
2526import "google/cloud/aiplatform/v1beta1/data_item.proto" ;
2627import "google/cloud/aiplatform/v1beta1/dataset.proto" ;
2728import "google/cloud/aiplatform/v1beta1/dataset_version.proto" ;
2829import "google/cloud/aiplatform/v1beta1/operation.proto" ;
2930import "google/cloud/aiplatform/v1beta1/saved_query.proto" ;
31+ import "google/cloud/aiplatform/v1beta1/tool.proto" ;
3032import "google/longrunning/operations.proto" ;
3133import "google/protobuf/empty.proto" ;
3234import "google/protobuf/field_mask.proto" ;
@@ -264,14 +266,40 @@ service DatasetService {
264266 option (google.api.method_signature ) = "name" ;
265267 }
266268
267- // Lists Annotations belongs to a dataitem
269+ // Lists Annotations belongs to a dataitem.
268270 rpc ListAnnotations (ListAnnotationsRequest )
269271 returns (ListAnnotationsResponse ) {
270272 option (google.api.http ) = {
271273 get : "/v1beta1/{parent=projects/*/locations/*/datasets/*/dataItems/*}/annotations"
272274 };
273275 option (google.api.method_signature ) = "parent" ;
274276 }
277+
278+ // Assesses the state or validity of the dataset with respect to a given use
279+ // case.
280+ rpc AssessData (AssessDataRequest ) returns (google.longrunning.Operation ) {
281+ option (google.api.http ) = {
282+ post : "/v1beta1/{name=projects/*/locations/*/datasets/*}:assess"
283+ body : "*"
284+ };
285+ option (google.longrunning.operation_info ) = {
286+ response_type : "AssessDataResponse"
287+ metadata_type : "AssessDataOperationMetadata"
288+ };
289+ }
290+
291+ // Assembles each row of a multimodal dataset and writes the result into a
292+ // BigQuery table.
293+ rpc AssembleData (AssembleDataRequest ) returns (google.longrunning.Operation ) {
294+ option (google.api.http ) = {
295+ post : "/v1beta1/{name=projects/*/locations/*/datasets/*}:assemble"
296+ body : "*"
297+ };
298+ option (google.longrunning.operation_info ) = {
299+ response_type : "AssembleDataResponse"
300+ metadata_type : "AssembleDataOperationMetadata"
301+ };
302+ }
275303}
276304
277305// Request message for
@@ -888,3 +916,224 @@ message ListAnnotationsResponse {
888916 // The standard List next-page token.
889917 string next_page_token = 2 ;
890918}
919+
920+ // Request message for
921+ // [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
922+ // Used only for MULTIMODAL datasets.
923+ message AssessDataRequest {
924+ // Configuration for the tuning validation assessment.
925+ message TuningValidationAssessmentConfig {
926+ // The dataset usage (e.g. training/validation).
927+ enum DatasetUsage {
928+ // Default value. Should not be used.
929+ DATASET_USAGE_UNSPECIFIED = 0 ;
930+
931+ // Supervised fine-tuning training dataset.
932+ SFT_TRAINING = 1 ;
933+
934+ // Supervised fine-tuning validation dataset.
935+ SFT_VALIDATION = 2 ;
936+ }
937+
938+ // Required. The name of the model used for tuning.
939+ string model_name = 1 [(google.api.field_behavior ) = REQUIRED ];
940+
941+ // Required. The dataset usage (e.g. training/validation).
942+ DatasetUsage dataset_usage = 2 [(google.api.field_behavior ) = REQUIRED ];
943+ }
944+
945+ // Configuration for the tuning resource usage assessment.
946+ message TuningResourceUsageAssessmentConfig {
947+ // Required. The name of the model used for tuning.
948+ string model_name = 1 [(google.api.field_behavior ) = REQUIRED ];
949+ }
950+
951+ // The assessment type.
952+ oneof assessment_config {
953+ // Optional. Configuration for the tuning validation assessment.
954+ TuningValidationAssessmentConfig tuning_validation_assessment_config = 2
955+ [(google.api.field_behavior ) = OPTIONAL ];
956+
957+ // Optional. Configuration for the tuning resource usage assessment.
958+ TuningResourceUsageAssessmentConfig
959+ tuning_resource_usage_assessment_config = 3
960+ [(google.api.field_behavior) = OPTIONAL];
961+ }
962+
963+ // The read config for the dataset.
964+ oneof read_config {
965+ // Optional. Config for assembling templates with a Gemini API structure to
966+ // assess assembled data.
967+ GeminiTemplateConfig gemini_template_config = 4
968+ [(google.api.field_behavior ) = OPTIONAL ];
969+ }
970+
971+ // Required. The name of the Dataset resource. Used only for MULTIMODAL
972+ // datasets. Format:
973+ // `projects/{project}/locations/{location}/datasets/{dataset}`
974+ string name = 1 [
975+ (google.api.field_behavior ) = REQUIRED ,
976+ (google.api.resource_reference ) = {
977+ type : "aiplatform.googleapis.com/Dataset"
978+ }
979+ ];
980+ }
981+
982+ // Response message for
983+ // [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
984+ message AssessDataResponse {
985+ // The result of the tuning validation assessment.
986+ message TuningValidationAssessmentResult {
987+ // Optional. A list containing the first validation errors.
988+ repeated string errors = 1 [(google.api.field_behavior ) = OPTIONAL ];
989+ }
990+
991+ // The result of the tuning resource usage assessment.
992+ message TuningResourceUsageAssessmentResult {
993+ // Number of tokens in the tuning dataset.
994+ int64 token_count = 1 ;
995+
996+ // Number of billable tokens in the tuning dataset.
997+ int64 billable_character_count = 2 ;
998+ }
999+
1000+ // The assessment result.
1001+ oneof assessment_result {
1002+ // Optional. The result of the tuning validation assessment.
1003+ TuningValidationAssessmentResult tuning_validation_assessment_result = 1
1004+ [(google.api.field_behavior ) = OPTIONAL ];
1005+
1006+ // Optional. The result of the tuning resource usage assessment.
1007+ TuningResourceUsageAssessmentResult
1008+ tuning_resource_usage_assessment_result = 2
1009+ [(google.api.field_behavior) = OPTIONAL];
1010+ }
1011+ }
1012+
1013+ // Runtime operation information for
1014+ // [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
1015+ message AssessDataOperationMetadata {
1016+ // The common part of the operation metadata.
1017+ GenericOperationMetadata generic_metadata = 1 ;
1018+ }
1019+
1020+ // Template configuration to create Gemini examples from a multimodal dataset.
1021+ message GeminiTemplateConfig {
1022+ // Required. The template that will be used for assembling the request to use
1023+ // for downstream applications.
1024+ GeminiExample gemini_example = 1 [(google.api.field_behavior ) = REQUIRED ];
1025+
1026+ // Required. Map of template params to the columns in the dataset table.
1027+ map <string , string > field_mapping = 2
1028+ [(google.api.field_behavior ) = REQUIRED ];
1029+ }
1030+
1031+ // Format for Gemini examples used for Vertex Multimodal datasets.
1032+ message GeminiExample {
1033+ // Optional. The fully qualified name of the publisher model or tuned model
1034+ // endpoint to use.
1035+ //
1036+ // Publisher model format:
1037+ // `projects/{project}/locations/{location}/publishers/*/models/*`
1038+ //
1039+ // Tuned model endpoint format:
1040+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
1041+ string model = 1 [
1042+ (google.api.field_behavior ) = OPTIONAL ,
1043+ (google.api.resource_reference ) = {
1044+ type : "aiplatform.googleapis.com/Endpoint"
1045+ }
1046+ ];
1047+
1048+ // Required. The content of the current conversation with the model.
1049+ //
1050+ // For single-turn queries, this is a single instance. For multi-turn
1051+ // queries, this is a repeated field that contains conversation history +
1052+ // latest request.
1053+ repeated Content contents = 2 [(google.api.field_behavior ) = REQUIRED ];
1054+
1055+ // Optional. The user provided system instructions for the model.
1056+ // Note: only text should be used in parts and content in each part will be
1057+ // in a separate paragraph.
1058+ optional Content system_instruction = 8
1059+ [(google.api.field_behavior ) = OPTIONAL ];
1060+
1061+ // Optional. The name of the cached content used as context to serve the
1062+ // prediction. Note: only used in explicit caching, where users can have
1063+ // control over caching (e.g. what content to cache) and enjoy guaranteed cost
1064+ // savings. Format:
1065+ // `projects/{project}/locations/{location}/cachedContents/{cachedContent}`
1066+ string cached_content = 9 [
1067+ (google.api.field_behavior ) = OPTIONAL ,
1068+ (google.api.resource_reference ) = {
1069+ type : "aiplatform.googleapis.com/CachedContent"
1070+ }
1071+ ];
1072+
1073+ // Optional. A list of `Tools` the model may use to generate the next
1074+ // response.
1075+ //
1076+ // A `Tool` is a piece of code that enables the system to interact with
1077+ // external systems to perform an action, or set of actions, outside of
1078+ // knowledge and scope of the model.
1079+ repeated Tool tools = 6 [(google.api.field_behavior ) = OPTIONAL ];
1080+
1081+ // Optional. Tool config. This config is shared for all tools provided in the
1082+ // request.
1083+ ToolConfig tool_config = 7 [(google.api.field_behavior ) = OPTIONAL ];
1084+
1085+ // Optional. The labels with user-defined metadata for the request. It is used
1086+ // for billing and reporting only.
1087+ //
1088+ // Label keys and values can be no longer than 63 characters
1089+ // (Unicode codepoints) and can only contain lowercase letters, numeric
1090+ // characters, underscores, and dashes. International characters are
1091+ // allowed. Label values are optional. Label keys must start with a letter.
1092+ map <string , string > labels = 10 [(google.api.field_behavior ) = OPTIONAL ];
1093+
1094+ // Optional. Per request settings for blocking unsafe content.
1095+ // Enforced on GenerateContentResponse.candidates.
1096+ repeated SafetySetting safety_settings = 3
1097+ [(google.api.field_behavior ) = OPTIONAL ];
1098+
1099+ // Optional. Generation config.
1100+ GenerationConfig generation_config = 4
1101+ [(google.api.field_behavior ) = OPTIONAL ];
1102+ }
1103+
1104+ // Request message for
1105+ // [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1106+ // Used only for MULTIMODAL datasets.
1107+ message AssembleDataRequest {
1108+ // The read config for the dataset.
1109+ oneof read_config {
1110+ // Optional. Config for assembling templates with a Gemini API structure.
1111+ GeminiTemplateConfig gemini_template_config = 2
1112+ [(google.api.field_behavior ) = OPTIONAL ];
1113+ }
1114+
1115+ // Required. The name of the Dataset resource (used only for MULTIMODAL
1116+ // datasets). Format:
1117+ // `projects/{project}/locations/{location}/datasets/{dataset}`
1118+ string name = 1 [
1119+ (google.api.field_behavior ) = REQUIRED ,
1120+ (google.api.resource_reference ) = {
1121+ type : "aiplatform.googleapis.com/Dataset"
1122+ }
1123+ ];
1124+ }
1125+
1126+ // Response message for
1127+ // [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1128+ message AssembleDataResponse {
1129+ // Destination BigQuery table path containing the assembled data as a single
1130+ // column.
1131+ string bigquery_destination = 1 ;
1132+ }
1133+
1134+ // Runtime operation information for
1135+ // [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1136+ message AssembleDataOperationMetadata {
1137+ // The common part of the operation metadata.
1138+ GenericOperationMetadata generic_metadata = 1 ;
1139+ }
0 commit comments