Skip to content

Commit 9af2768

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add AssessData and AssembleData RPCs to DatasetService
PiperOrigin-RevId: 738721843
1 parent 985a0b9 commit 9af2768

1 file changed

Lines changed: 250 additions & 1 deletion

File tree

google/cloud/aiplatform/v1beta1/dataset_service.proto

Lines changed: 250 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ import "google/api/field_behavior.proto";
2222
import "google/api/resource.proto";
2323
import "google/cloud/aiplatform/v1beta1/annotation.proto";
2424
import "google/cloud/aiplatform/v1beta1/annotation_spec.proto";
25+
import "google/cloud/aiplatform/v1beta1/content.proto";
2526
import "google/cloud/aiplatform/v1beta1/data_item.proto";
2627
import "google/cloud/aiplatform/v1beta1/dataset.proto";
2728
import "google/cloud/aiplatform/v1beta1/dataset_version.proto";
2829
import "google/cloud/aiplatform/v1beta1/operation.proto";
2930
import "google/cloud/aiplatform/v1beta1/saved_query.proto";
31+
import "google/cloud/aiplatform/v1beta1/tool.proto";
3032
import "google/longrunning/operations.proto";
3133
import "google/protobuf/empty.proto";
3234
import "google/protobuf/field_mask.proto";
@@ -264,14 +266,40 @@ service DatasetService {
264266
option (google.api.method_signature) = "name";
265267
}
266268

267-
// Lists Annotations belongs to a dataitem
269+
// Lists Annotations belongs to a dataitem.
268270
rpc ListAnnotations(ListAnnotationsRequest)
269271
returns (ListAnnotationsResponse) {
270272
option (google.api.http) = {
271273
get: "/v1beta1/{parent=projects/*/locations/*/datasets/*/dataItems/*}/annotations"
272274
};
273275
option (google.api.method_signature) = "parent";
274276
}
277+
278+
// Assesses the state or validity of the dataset with respect to a given use
279+
// case.
280+
rpc AssessData(AssessDataRequest) returns (google.longrunning.Operation) {
281+
option (google.api.http) = {
282+
post: "/v1beta1/{name=projects/*/locations/*/datasets/*}:assess"
283+
body: "*"
284+
};
285+
option (google.longrunning.operation_info) = {
286+
response_type: "AssessDataResponse"
287+
metadata_type: "AssessDataOperationMetadata"
288+
};
289+
}
290+
291+
// Assembles each row of a multimodal dataset and writes the result into a
292+
// BigQuery table.
293+
rpc AssembleData(AssembleDataRequest) returns (google.longrunning.Operation) {
294+
option (google.api.http) = {
295+
post: "/v1beta1/{name=projects/*/locations/*/datasets/*}:assemble"
296+
body: "*"
297+
};
298+
option (google.longrunning.operation_info) = {
299+
response_type: "AssembleDataResponse"
300+
metadata_type: "AssembleDataOperationMetadata"
301+
};
302+
}
275303
}
276304

277305
// Request message for
@@ -888,3 +916,224 @@ message ListAnnotationsResponse {
888916
// The standard List next-page token.
889917
string next_page_token = 2;
890918
}
919+
920+
// Request message for
921+
// [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
922+
// Used only for MULTIMODAL datasets.
923+
message AssessDataRequest {
924+
// Configuration for the tuning validation assessment.
925+
message TuningValidationAssessmentConfig {
926+
// The dataset usage (e.g. training/validation).
927+
enum DatasetUsage {
928+
// Default value. Should not be used.
929+
DATASET_USAGE_UNSPECIFIED = 0;
930+
931+
// Supervised fine-tuning training dataset.
932+
SFT_TRAINING = 1;
933+
934+
// Supervised fine-tuning validation dataset.
935+
SFT_VALIDATION = 2;
936+
}
937+
938+
// Required. The name of the model used for tuning.
939+
string model_name = 1 [(google.api.field_behavior) = REQUIRED];
940+
941+
// Required. The dataset usage (e.g. training/validation).
942+
DatasetUsage dataset_usage = 2 [(google.api.field_behavior) = REQUIRED];
943+
}
944+
945+
// Configuration for the tuning resource usage assessment.
946+
message TuningResourceUsageAssessmentConfig {
947+
// Required. The name of the model used for tuning.
948+
string model_name = 1 [(google.api.field_behavior) = REQUIRED];
949+
}
950+
951+
// The assessment type.
952+
oneof assessment_config {
953+
// Optional. Configuration for the tuning validation assessment.
954+
TuningValidationAssessmentConfig tuning_validation_assessment_config = 2
955+
[(google.api.field_behavior) = OPTIONAL];
956+
957+
// Optional. Configuration for the tuning resource usage assessment.
958+
TuningResourceUsageAssessmentConfig
959+
tuning_resource_usage_assessment_config = 3
960+
[(google.api.field_behavior) = OPTIONAL];
961+
}
962+
963+
// The read config for the dataset.
964+
oneof read_config {
965+
// Optional. Config for assembling templates with a Gemini API structure to
966+
// assess assembled data.
967+
GeminiTemplateConfig gemini_template_config = 4
968+
[(google.api.field_behavior) = OPTIONAL];
969+
}
970+
971+
// Required. The name of the Dataset resource. Used only for MULTIMODAL
972+
// datasets. Format:
973+
// `projects/{project}/locations/{location}/datasets/{dataset}`
974+
string name = 1 [
975+
(google.api.field_behavior) = REQUIRED,
976+
(google.api.resource_reference) = {
977+
type: "aiplatform.googleapis.com/Dataset"
978+
}
979+
];
980+
}
981+
982+
// Response message for
983+
// [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
984+
message AssessDataResponse {
985+
// The result of the tuning validation assessment.
986+
message TuningValidationAssessmentResult {
987+
// Optional. A list containing the first validation errors.
988+
repeated string errors = 1 [(google.api.field_behavior) = OPTIONAL];
989+
}
990+
991+
// The result of the tuning resource usage assessment.
992+
message TuningResourceUsageAssessmentResult {
993+
// Number of tokens in the tuning dataset.
994+
int64 token_count = 1;
995+
996+
// Number of billable tokens in the tuning dataset.
997+
int64 billable_character_count = 2;
998+
}
999+
1000+
// The assessment result.
1001+
oneof assessment_result {
1002+
// Optional. The result of the tuning validation assessment.
1003+
TuningValidationAssessmentResult tuning_validation_assessment_result = 1
1004+
[(google.api.field_behavior) = OPTIONAL];
1005+
1006+
// Optional. The result of the tuning resource usage assessment.
1007+
TuningResourceUsageAssessmentResult
1008+
tuning_resource_usage_assessment_result = 2
1009+
[(google.api.field_behavior) = OPTIONAL];
1010+
}
1011+
}
1012+
1013+
// Runtime operation information for
1014+
// [DatasetService.AssessData][google.cloud.aiplatform.v1beta1.DatasetService.AssessData].
1015+
message AssessDataOperationMetadata {
1016+
// The common part of the operation metadata.
1017+
GenericOperationMetadata generic_metadata = 1;
1018+
}
1019+
1020+
// Template configuration to create Gemini examples from a multimodal dataset.
1021+
message GeminiTemplateConfig {
1022+
// Required. The template that will be used for assembling the request to use
1023+
// for downstream applications.
1024+
GeminiExample gemini_example = 1 [(google.api.field_behavior) = REQUIRED];
1025+
1026+
// Required. Map of template params to the columns in the dataset table.
1027+
map<string, string> field_mapping = 2
1028+
[(google.api.field_behavior) = REQUIRED];
1029+
}
1030+
1031+
// Format for Gemini examples used for Vertex Multimodal datasets.
1032+
message GeminiExample {
1033+
// Optional. The fully qualified name of the publisher model or tuned model
1034+
// endpoint to use.
1035+
//
1036+
// Publisher model format:
1037+
// `projects/{project}/locations/{location}/publishers/*/models/*`
1038+
//
1039+
// Tuned model endpoint format:
1040+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
1041+
string model = 1 [
1042+
(google.api.field_behavior) = OPTIONAL,
1043+
(google.api.resource_reference) = {
1044+
type: "aiplatform.googleapis.com/Endpoint"
1045+
}
1046+
];
1047+
1048+
// Required. The content of the current conversation with the model.
1049+
//
1050+
// For single-turn queries, this is a single instance. For multi-turn
1051+
// queries, this is a repeated field that contains conversation history +
1052+
// latest request.
1053+
repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
1054+
1055+
// Optional. The user provided system instructions for the model.
1056+
// Note: only text should be used in parts and content in each part will be
1057+
// in a separate paragraph.
1058+
optional Content system_instruction = 8
1059+
[(google.api.field_behavior) = OPTIONAL];
1060+
1061+
// Optional. The name of the cached content used as context to serve the
1062+
// prediction. Note: only used in explicit caching, where users can have
1063+
// control over caching (e.g. what content to cache) and enjoy guaranteed cost
1064+
// savings. Format:
1065+
// `projects/{project}/locations/{location}/cachedContents/{cachedContent}`
1066+
string cached_content = 9 [
1067+
(google.api.field_behavior) = OPTIONAL,
1068+
(google.api.resource_reference) = {
1069+
type: "aiplatform.googleapis.com/CachedContent"
1070+
}
1071+
];
1072+
1073+
// Optional. A list of `Tools` the model may use to generate the next
1074+
// response.
1075+
//
1076+
// A `Tool` is a piece of code that enables the system to interact with
1077+
// external systems to perform an action, or set of actions, outside of
1078+
// knowledge and scope of the model.
1079+
repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL];
1080+
1081+
// Optional. Tool config. This config is shared for all tools provided in the
1082+
// request.
1083+
ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL];
1084+
1085+
// Optional. The labels with user-defined metadata for the request. It is used
1086+
// for billing and reporting only.
1087+
//
1088+
// Label keys and values can be no longer than 63 characters
1089+
// (Unicode codepoints) and can only contain lowercase letters, numeric
1090+
// characters, underscores, and dashes. International characters are
1091+
// allowed. Label values are optional. Label keys must start with a letter.
1092+
map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
1093+
1094+
// Optional. Per request settings for blocking unsafe content.
1095+
// Enforced on GenerateContentResponse.candidates.
1096+
repeated SafetySetting safety_settings = 3
1097+
[(google.api.field_behavior) = OPTIONAL];
1098+
1099+
// Optional. Generation config.
1100+
GenerationConfig generation_config = 4
1101+
[(google.api.field_behavior) = OPTIONAL];
1102+
}
1103+
1104+
// Request message for
1105+
// [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1106+
// Used only for MULTIMODAL datasets.
1107+
message AssembleDataRequest {
1108+
// The read config for the dataset.
1109+
oneof read_config {
1110+
// Optional. Config for assembling templates with a Gemini API structure.
1111+
GeminiTemplateConfig gemini_template_config = 2
1112+
[(google.api.field_behavior) = OPTIONAL];
1113+
}
1114+
1115+
// Required. The name of the Dataset resource (used only for MULTIMODAL
1116+
// datasets). Format:
1117+
// `projects/{project}/locations/{location}/datasets/{dataset}`
1118+
string name = 1 [
1119+
(google.api.field_behavior) = REQUIRED,
1120+
(google.api.resource_reference) = {
1121+
type: "aiplatform.googleapis.com/Dataset"
1122+
}
1123+
];
1124+
}
1125+
1126+
// Response message for
1127+
// [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1128+
message AssembleDataResponse {
1129+
// Destination BigQuery table path containing the assembled data as a single
1130+
// column.
1131+
string bigquery_destination = 1;
1132+
}
1133+
1134+
// Runtime operation information for
1135+
// [DatasetService.AssembleData][google.cloud.aiplatform.v1beta1.DatasetService.AssembleData].
1136+
message AssembleDataOperationMetadata {
1137+
// The common part of the operation metadata.
1138+
GenericOperationMetadata generic_metadata = 1;
1139+
}

0 commit comments

Comments
 (0)