@@ -121,6 +121,12 @@ message EvaluateInstancesRequest {
121121 // metric.
122122 QuestionAnsweringCorrectnessInput question_answering_correctness_input = 18 ;
123123
124+ // Input for pointwise metric.
125+ PointwiseMetricInput pointwise_metric_input = 28 ;
126+
127+ // Input for pairwise metric.
128+ PairwiseMetricInput pairwise_metric_input = 29 ;
129+
124130 // Tool call metric instances.
125131 // Input for tool call valid metric.
126132 ToolCallValidInput tool_call_valid_input = 19 ;
@@ -210,6 +216,13 @@ message EvaluateInstancesResponse {
210216 QuestionAnsweringCorrectnessResult question_answering_correctness_result =
211217 17;
212218
219+ // Generic metrics.
220+ // Result for pointwise metric.
221+ PointwiseMetricResult pointwise_metric_result = 27 ;
222+
223+ // Result for pairwise metric.
224+ PairwiseMetricResult pairwise_metric_result = 28 ;
225+
213226 // Tool call metrics.
214227 // Results for tool call valid metric.
215228 ToolCallValidResults tool_call_valid_results = 18 ;
@@ -957,6 +970,81 @@ message QuestionAnsweringCorrectnessResult {
957970 optional float confidence = 3 [(google.api.field_behavior ) = OUTPUT_ONLY ];
958971}
959972
973+ // Input for pointwise metric.
974+ message PointwiseMetricInput {
975+ // Required. Spec for pointwise metric.
976+ PointwiseMetricSpec metric_spec = 1 [(google.api.field_behavior ) = REQUIRED ];
977+
978+ // Required. Pointwise metric instance.
979+ PointwiseMetricInstance instance = 2 [(google.api.field_behavior ) = REQUIRED ];
980+ }
981+
982+ // Pointwise metric instance. Usually one instance corresponds to one row in an
983+ // evaluation dataset.
984+ message PointwiseMetricInstance {
985+ // Instance for pointwise metric.
986+ oneof instance {
987+ // Instance specified as a json string. String key-value pairs are expected
988+ // in the json_instance to render
989+ // PointwiseMetricSpec.instance_prompt_template.
990+ string json_instance = 1 ;
991+ }
992+ }
993+
994+ // Spec for pointwise metric.
995+ message PointwiseMetricSpec {
996+ // Required. Metric prompt template for pointwise metric.
997+ optional string metric_prompt_template = 1
998+ [(google.api.field_behavior ) = REQUIRED ];
999+ }
1000+
1001+ // Spec for pointwise metric result.
1002+ message PointwiseMetricResult {
1003+ // Output only. Pointwise metric score.
1004+ optional float score = 1 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1005+
1006+ // Output only. Explanation for pointwise metric score.
1007+ string explanation = 2 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1008+ }
1009+
1010+ // Input for pairwise metric.
1011+ message PairwiseMetricInput {
1012+ // Required. Spec for pairwise metric.
1013+ PairwiseMetricSpec metric_spec = 1 [(google.api.field_behavior ) = REQUIRED ];
1014+
1015+ // Required. Pairwise metric instance.
1016+ PairwiseMetricInstance instance = 2 [(google.api.field_behavior ) = REQUIRED ];
1017+ }
1018+
1019+ // Pairwise metric instance. Usually one instance corresponds to one row in an
1020+ // evaluation dataset.
1021+ message PairwiseMetricInstance {
1022+ // Instance for pairwise metric.
1023+ oneof instance {
1024+ // Instance specified as a json string. String key-value pairs are expected
1025+ // in the json_instance to render
1026+ // PairwiseMetricSpec.instance_prompt_template.
1027+ string json_instance = 1 ;
1028+ }
1029+ }
1030+
1031+ // Spec for pairwise metric.
1032+ message PairwiseMetricSpec {
1033+ // Required. Metric prompt template for pairwise metric.
1034+ optional string metric_prompt_template = 1
1035+ [(google.api.field_behavior ) = REQUIRED ];
1036+ }
1037+
1038+ // Spec for pairwise metric result.
1039+ message PairwiseMetricResult {
1040+ // Output only. Pairwise metric choice.
1041+ PairwiseChoice pairwise_choice = 1
1042+ [(google.api.field_behavior ) = OUTPUT_ONLY ];
1043+
1044+ // Output only. Explanation for pairwise metric score.
1045+ string explanation = 2 [(google.api.field_behavior ) = OUTPUT_ONLY ];
1046+ }
1047+
9601048// Input for tool call valid metric.
9611049message ToolCallValidInput {
9621050 // Required. Spec for tool call valid metric.
0 commit comments