Skip to content

Commit 0cdaee6

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add Aggregation Output in EvaluateDataset Get Operation Response
docs: Add constraints for AggregationMetric enum and default value for flip_enabled field in AutoraterConfig PiperOrigin-RevId: 781252306
1 parent 302273a commit 0cdaee6

File tree

1 file changed

+50
-12
lines changed

1 file changed

+50
-12
lines changed

google/cloud/aiplatform/v1beta1/evaluation_service.proto

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ message EvaluateDatasetOperationMetadata {
8585

8686
// Response in LRO for EvaluationService.EvaluateDataset.
8787
message EvaluateDatasetResponse {
88+
// Output only. Aggregation statistics derived from results of
89+
// EvaluationService.EvaluateDataset.
90+
AggregationOutput aggregation_output = 1
91+
[(google.api.field_behavior) = OUTPUT_ONLY];
92+
8893
// Output only. Output info for EvaluationService.EvaluateDataset.
8994
OutputInfo output_info = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
9095
}
@@ -99,6 +104,39 @@ message OutputInfo {
99104
}
100105
}
101106

107+
// The aggregation result for the entire dataset and all metrics.
108+
message AggregationOutput {
109+
// The dataset used for evaluation & aggregation.
110+
EvaluationDataset dataset = 1;
111+
112+
// One AggregationResult per metric.
113+
repeated AggregationResult aggregation_results = 2;
114+
}
115+
116+
// The aggregation result for a single metric.
117+
message AggregationResult {
118+
// The aggregation result.
119+
oneof aggregation_result {
120+
// Result for pointwise metric.
121+
PointwiseMetricResult pointwise_metric_result = 5;
122+
123+
// Result for pairwise metric.
124+
PairwiseMetricResult pairwise_metric_result = 6;
125+
126+
// Results for exact match metric.
127+
ExactMatchMetricValue exact_match_metric_value = 7;
128+
129+
// Results for bleu metric.
130+
BleuMetricValue bleu_metric_value = 8;
131+
132+
// Results for rouge metric.
133+
RougeMetricValue rouge_metric_value = 9;
134+
}
135+
136+
// Aggregation metric.
137+
Metric.AggregationMetric aggregation_metric = 4;
138+
}
139+
102140
// Request message for EvaluationService.EvaluateDataset.
103141
message EvaluateDatasetRequest {
104142
// Required. The resource name of the Location to evaluate the dataset.
@@ -141,34 +179,34 @@ message Metric {
141179
// Unspecified aggregation metric.
142180
AGGREGATION_METRIC_UNSPECIFIED = 0;
143181

144-
// Average aggregation metric.
182+
// Average aggregation metric. Not supported for Pairwise metric.
145183
AVERAGE = 1;
146184

147185
// Mode aggregation metric.
148186
MODE = 2;
149187

150-
// Standard deviation aggregation metric.
188+
// Standard deviation aggregation metric. Not supported for pairwise metric.
151189
STANDARD_DEVIATION = 3;
152190

153-
// Variance aggregation metric.
191+
// Variance aggregation metric. Not supported for pairwise metric.
154192
VARIANCE = 4;
155193

156-
// Minimum aggregation metric.
194+
// Minimum aggregation metric. Not supported for pairwise metric.
157195
MINIMUM = 5;
158196

159-
// Maximum aggregation metric.
197+
// Maximum aggregation metric. Not supported for pairwise metric.
160198
MAXIMUM = 6;
161199

162-
// Median aggregation metric.
200+
// Median aggregation metric. Not supported for pairwise metric.
163201
MEDIAN = 7;
164202

165-
// 90th percentile aggregation metric.
203+
// 90th percentile aggregation metric. Not supported for pairwise metric.
166204
PERCENTILE_P90 = 8;
167205

168-
// 95th percentile aggregation metric.
206+
// 95th percentile aggregation metric. Not supported for pairwise metric.
169207
PERCENTILE_P95 = 9;
170208

171-
// 99th percentile aggregation metric.
209+
// 99th percentile aggregation metric. Not supported for pairwise metric.
172210
PERCENTILE_P99 = 10;
173211
}
174212

@@ -216,9 +254,9 @@ message AutoraterConfig {
216254
// is 32.
217255
optional int32 sampling_count = 1 [(google.api.field_behavior) = OPTIONAL];
218256

219-
// Optional. Whether to flip the candidate and baseline responses.
220-
// This is only applicable to the pairwise metric. If enabled, also provide
221-
// PairwiseMetricSpec.candidate_response_field_name and
257+
// Optional. Default is true. Whether to flip the candidate and baseline
258+
// responses. This is only applicable to the pairwise metric. If enabled, also
259+
// provide PairwiseMetricSpec.candidate_response_field_name and
222260
// PairwiseMetricSpec.baseline_response_field_name. When rendering
223261
// PairwiseMetricSpec.metric_prompt_template, the candidate and baseline
224262
// fields will be flipped for half of the samples to reduce bias.

0 commit comments

Comments
 (0)