feat: Add Aggregation Output in EvaluateDataset Get Operation Response

Google APIs · copybara-github · commit 0cdaee64d46d · 2025-07-09T16:18:16.000-07:00
docs: Add constraints for AggregationMetric enum and default value for flip_enabled field in AutoraterConfig

PiperOrigin-RevId: 781252306
diff --git a/google/cloud/aiplatform/v1beta1/evaluation_service.proto b/google/cloud/aiplatform/v1beta1/evaluation_service.proto
@@ -85,6 +85,11 @@ message EvaluateDatasetOperationMetadata {
 
 // Response in LRO for EvaluationService.EvaluateDataset.
 message EvaluateDatasetResponse {
+  // Output only. Aggregation statistics derived from results of
+  // EvaluationService.EvaluateDataset.
+  AggregationOutput aggregation_output = 1
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
   // Output only. Output info for EvaluationService.EvaluateDataset.
   OutputInfo output_info = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
@@ -99,6 +104,39 @@ message OutputInfo {
   }
 }
 
+// The aggregation result for the entire dataset and all metrics.
+message AggregationOutput {
+  // The dataset used for evaluation & aggregation.
+  EvaluationDataset dataset = 1;
+
+  // One AggregationResult per metric.
+  repeated AggregationResult aggregation_results = 2;
+}
+
+// The aggregation result for a single metric.
+message AggregationResult {
+  // The aggregation result.
+  oneof aggregation_result {
+    // Result for pointwise metric.
+    PointwiseMetricResult pointwise_metric_result = 5;
+
+    // Result for pairwise metric.
+    PairwiseMetricResult pairwise_metric_result = 6;
+
+    // Results for exact match metric.
+    ExactMatchMetricValue exact_match_metric_value = 7;
+
+    // Results for bleu metric.
+    BleuMetricValue bleu_metric_value = 8;
+
+    // Results for rouge metric.
+    RougeMetricValue rouge_metric_value = 9;
+  }
+
+  // Aggregation metric.
+  Metric.AggregationMetric aggregation_metric = 4;
+}
+
 // Request message for EvaluationService.EvaluateDataset.
 message EvaluateDatasetRequest {
   // Required. The resource name of the Location to evaluate the dataset.
@@ -141,34 +179,34 @@ message Metric {
     // Unspecified aggregation metric.
     AGGREGATION_METRIC_UNSPECIFIED = 0;
 
-    // Average aggregation metric.
+    // Average aggregation metric. Not supported for Pairwise metric.
     AVERAGE = 1;
 
     // Mode aggregation metric.
     MODE = 2;
 
-    // Standard deviation aggregation metric.
+    // Standard deviation aggregation metric. Not supported for pairwise metric.
     STANDARD_DEVIATION = 3;
 
-    // Variance aggregation metric.
+    // Variance aggregation metric. Not supported for pairwise metric.
     VARIANCE = 4;
 
-    // Minimum aggregation metric.
+    // Minimum aggregation metric. Not supported for pairwise metric.
     MINIMUM = 5;
 
-    // Maximum aggregation metric.
+    // Maximum aggregation metric. Not supported for pairwise metric.
     MAXIMUM = 6;
 
-    // Median aggregation metric.
+    // Median aggregation metric. Not supported for pairwise metric.
     MEDIAN = 7;
 
-    // 90th percentile aggregation metric.
+    // 90th percentile aggregation metric. Not supported for pairwise metric.
     PERCENTILE_P90 = 8;
 
-    // 95th percentile aggregation metric.
+    // 95th percentile aggregation metric. Not supported for pairwise metric.
     PERCENTILE_P95 = 9;
 
-    // 99th percentile aggregation metric.
+    // 99th percentile aggregation metric. Not supported for pairwise metric.
     PERCENTILE_P99 = 10;
   }
 
@@ -216,9 +254,9 @@ message AutoraterConfig {
   // is 32.
   optional int32 sampling_count = 1 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. Whether to flip the candidate and baseline responses.
-  // This is only applicable to the pairwise metric. If enabled, also provide
-  // PairwiseMetricSpec.candidate_response_field_name and
+  // Optional. Default is true. Whether to flip the candidate and baseline
+  // responses. This is only applicable to the pairwise metric. If enabled, also
+  // provide PairwiseMetricSpec.candidate_response_field_name and
   // PairwiseMetricSpec.baseline_response_field_name. When rendering
   // PairwiseMetricSpec.metric_prompt_template, the candidate and baseline
   // fields will be flipped for half of the samples to reduce bias.