@@ -60,6 +60,33 @@ enum PairwiseChoice {
6060 TIE = 3 ;
6161}
6262
63+ // The configs for autorater. This is applicable to both EvaluateInstances and
64+ // EvaluateDataset.
65+ message AutoraterConfig {
66+ // Optional. Number of samples for each instance in the dataset.
67+ // If not specified, the default is 4. Minimum value is 1, maximum value
68+ // is 32.
69+ optional int32 sampling_count = 1 [(google.api.field_behavior ) = OPTIONAL ];
70+
71+ // Optional. Whether to flip the candidate and baseline responses.
72+ // This is only applicable to the pairwise metric. If enabled, also provide
73+ // PairwiseMetricSpec.candidate_response_field_name and
74+ // PairwiseMetricSpec.baseline_response_field_name. When rendering
75+ // PairwiseMetricSpec.metric_prompt_template, the candidate and baseline
76+ // fields will be flipped for half of the samples to reduce bias.
77+ optional bool flip_enabled = 2 [(google.api.field_behavior ) = OPTIONAL ];
78+
79+ // Optional. The fully qualified name of the publisher model or tuned
80+ // autorater endpoint to use.
81+ //
82+ // Publisher model format:
83+ // `projects/{project}/locations/{location}/publishers/*/models/*`
84+ //
85+ // Tuned model endpoint format:
86+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
87+ string autorater_model = 3 [(google.api.field_behavior ) = OPTIONAL ];
88+ }
89+
6390// Request message for EvaluationService.EvaluateInstances.
6491message EvaluateInstancesRequest {
6592 // Instances and specs for evaluation
@@ -140,6 +167,13 @@ message EvaluateInstancesRequest {
140167 // Input for tool parameter key value match metric.
141168 ToolParameterKVMatchInput tool_parameter_kv_match_input = 22 ;
142169
170+ // Translation metrics.
171+ // Input for Comet metric.
172+ CometInput comet_input = 31 ;
173+
174+ // Input for Metricx metric.
175+ MetricxInput metricx_input = 32 ;
176+
143177 // Input for trajectory exact match metric.
144178 TrajectoryExactMatchInput trajectory_exact_match_input = 33 ;
145179
@@ -167,6 +201,10 @@ message EvaluateInstancesRequest {
167201 type : "locations.googleapis.com/Location"
168202 }
169203 ];
204+
205+ // Optional. Autorater config used for evaluation.
206+ AutoraterConfig autorater_config = 30
207+ [(google.api.field_behavior ) = OPTIONAL ];
170208}
171209
172210// Response message for EvaluationService.EvaluateInstances.
@@ -254,6 +292,13 @@ message EvaluateInstancesResponse {
254292 // Results for tool parameter key value match metric.
255293 ToolParameterKVMatchResults tool_parameter_kv_match_results = 21 ;
256294
295+ // Translation metrics.
296+ // Result for Comet metric.
297+ CometResult comet_result = 29 ;
298+
299+ // Result for Metricx metric.
300+ MetricxResult metricx_result = 30 ;
301+
257302 // Result for trajectory exact match metric.
258303 TrajectoryExactMatchResults trajectory_exact_match_results = 31 ;
259304
@@ -1032,6 +1077,10 @@ message PointwiseMetricSpec {
10321077 // Required. Metric prompt template for pointwise metric.
10331078 optional string metric_prompt_template = 1
10341079 [(google.api.field_behavior ) = REQUIRED ];
1080+
1081+ // Optional. System instructions for pointwise metric.
1082+ optional string system_instruction = 2
1083+ [(google.api.field_behavior ) = OPTIONAL ];
10351084}
10361085
10371086// Spec for pointwise metric result.
@@ -1069,6 +1118,18 @@ message PairwiseMetricSpec {
10691118 // Required. Metric prompt template for pairwise metric.
10701119 optional string metric_prompt_template = 1
10711120 [(google.api.field_behavior ) = REQUIRED ];
1121+
1122+ // Optional. The field name of the candidate response.
1123+ string candidate_response_field_name = 2
1124+ [(google.api.field_behavior ) = OPTIONAL ];
1125+
1126+ // Optional. The field name of the baseline response.
1127+ string baseline_response_field_name = 3
1128+ [(google.api.field_behavior ) = OPTIONAL ];
1129+
1130+ // Optional. System instructions for pairwise metric.
1131+ optional string system_instruction = 4
1132+ [(google.api.field_behavior ) = OPTIONAL ];
10721133}
10731134
10741135// Spec for pairwise metric result.
@@ -1228,6 +1289,116 @@ message ToolParameterKVMatchMetricValue {
12281289 optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
12291290}
12301291
1292+ // Input for Comet metric.
1293+ message CometInput {
1294+ // Required. Spec for comet metric.
1295+ CometSpec metric_spec = 1 [(google.api.field_behavior ) = REQUIRED ];
1296+
1297+ // Required. Comet instance.
1298+ CometInstance instance = 2 [(google.api.field_behavior ) = REQUIRED ];
1299+ }
1300+
1301+ // Spec for Comet metric.
1302+ message CometSpec {
1303+ // Comet version options.
1304+ enum CometVersion {
1305+ // Comet version unspecified.
1306+ COMET_VERSION_UNSPECIFIED = 0 ;
1307+
1308+ // Comet 22 for translation + source + reference
1309+ // (source-reference-combined).
1310+ COMET_22_SRC_REF = 2 ;
1311+ }
1312+
1313+ // Required. Which version to use for evaluation.
1314+ optional CometVersion version = 1 [(google.api.field_behavior ) = REQUIRED ];
1315+
1316+ // Optional. Source language in BCP-47 format.
1317+ string source_language = 2 [(google.api.field_behavior ) = OPTIONAL ];
1318+
1319+ // Optional. Target language in BCP-47 format. Covers both prediction and
1320+ // reference.
1321+ string target_language = 3 [(google.api.field_behavior ) = OPTIONAL ];
1322+ }
1323+
1324+ // Spec for Comet instance - The fields used for evaluation are dependent on the
1325+ // comet version.
1326+ message CometInstance {
1327+ // Required. Output of the evaluated model.
1328+ optional string prediction = 1 [(google.api.field_behavior ) = REQUIRED ];
1329+
1330+ // Optional. Ground truth used to compare against the prediction.
1331+ optional string reference = 2 [(google.api.field_behavior ) = OPTIONAL ];
1332+
1333+ // Optional. Source text in original language.
1334+ optional string source = 3 [(google.api.field_behavior ) = OPTIONAL ];
1335+ }
1336+
1337+ // Spec for Comet result - calculates the comet score for the given instance
1338+ // using the version specified in the spec.
1339+ message CometResult {
1340+ // Output only. Comet score. Range depends on version.
1341+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1342+ }
1343+
1344+ // Input for MetricX metric.
1345+ message MetricxInput {
1346+ // Required. Spec for Metricx metric.
1347+ MetricxSpec metric_spec = 1 [(google.api.field_behavior ) = REQUIRED ];
1348+
1349+ // Required. Metricx instance.
1350+ MetricxInstance instance = 2 [(google.api.field_behavior ) = REQUIRED ];
1351+ }
1352+
1353+ // Spec for MetricX metric.
1354+ message MetricxSpec {
1355+ // MetricX Version options.
1356+ enum MetricxVersion {
1357+ // MetricX version unspecified.
1358+ METRICX_VERSION_UNSPECIFIED = 0 ;
1359+
1360+ // MetricX 2024 (2.6) for translation + reference (reference-based).
1361+ METRICX_24_REF = 1 ;
1362+
1363+ // MetricX 2024 (2.6) for translation + source (QE).
1364+ METRICX_24_SRC = 2 ;
1365+
1366+ // MetricX 2024 (2.6) for translation + source + reference
1367+ // (source-reference-combined).
1368+ METRICX_24_SRC_REF = 3 ;
1369+ }
1370+
1371+ // Required. Which version to use for evaluation.
1372+ optional MetricxVersion version = 1 [(google.api.field_behavior ) = REQUIRED ];
1373+
1374+ // Optional. Source language in BCP-47 format.
1375+ string source_language = 2 [(google.api.field_behavior ) = OPTIONAL ];
1376+
1377+ // Optional. Target language in BCP-47 format. Covers both prediction and
1378+ // reference.
1379+ string target_language = 3 [(google.api.field_behavior ) = OPTIONAL ];
1380+ }
1381+
1382+ // Spec for MetricX instance - The fields used for evaluation are dependent on
1383+ // the MetricX version.
1384+ message MetricxInstance {
1385+ // Required. Output of the evaluated model.
1386+ optional string prediction = 1 [(google.api.field_behavior ) = REQUIRED ];
1387+
1388+ // Optional. Ground truth used to compare against the prediction.
1389+ optional string reference = 2 [(google.api.field_behavior ) = OPTIONAL ];
1390+
1391+ // Optional. Source text in original language.
1392+ optional string source = 3 [(google.api.field_behavior ) = OPTIONAL ];
1393+ }
1394+
1395+ // Spec for MetricX result - calculates the MetricX score for the given instance
1396+ // using the version specified in the spec.
1397+ message MetricxResult {
1398+ // Output only. MetricX score. Range depends on version.
1399+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1400+ }
1401+
12311402// Instances and metric spec for TrajectoryExactMatch metric.
12321403message TrajectoryExactMatchInput {
12331404 // Required. Spec for TrajectoryExactMatch metric.
0 commit comments