Skip to content

Commit 0ef98bc

Browse files
Google APIscopybara-github
authored andcommitted
feat: Added ranking_expression_backed and rank_signals fields related to the Custom Ranking feature
docs: A comment for field `ranking_expression` in messages `.google.cloud.discoveryengine.v1alpha.SearchRequest` and `.google.cloud.discoveryengine.v1beta.SearchRequest` is changed to support the Custom Ranking use case PiperOrigin-RevId: 805306845
1 parent 6d1dca2 commit 0ef98bc

3 files changed

Lines changed: 408 additions & 8 deletions

File tree

google/cloud/discoveryengine/v1/search_service.proto

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,115 @@ message SearchRequest {
10691069
// Optional. The specification for returning the relevance score.
10701070
RelevanceScoreSpec relevance_score_spec = 52
10711071
[(google.api.field_behavior) = OPTIONAL];
1072+
1073+
// The ranking expression controls the customized ranking on retrieval
1074+
// documents. This overrides
1075+
// [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1.ServingConfig.ranking_expression].
1076+
// The syntax and supported features depend on the
1077+
// `ranking_expression_backend` value. If `ranking_expression_backend` is not
1078+
// provided, it defaults to `RANK_BY_EMBEDDING`.
1079+
//
1080+
// If
1081+
// [ranking_expression_backend][google.cloud.discoveryengine.v1.SearchRequest.ranking_expression_backend]
1082+
// is not provided or set to `RANK_BY_EMBEDDING`, it should be a single
1083+
// function or multiple functions that are joined by "+".
1084+
//
1085+
// * ranking_expression = function, { " + ", function };
1086+
//
1087+
// Supported functions:
1088+
//
1089+
// * double * relevance_score
1090+
// * double * dotProduct(embedding_field_path)
1091+
//
1092+
// Function variables:
1093+
//
1094+
// * `relevance_score`: pre-defined keywords, used for measure relevance
1095+
// between query and document.
1096+
// * `embedding_field_path`: the document embedding field
1097+
// used with query embedding vector.
1098+
// * `dotProduct`: embedding function between `embedding_field_path` and
1099+
// query embedding vector.
1100+
//
1101+
// Example ranking expression:
1102+
//
1103+
// If document has an embedding field doc_embedding, the ranking expression
1104+
// could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`.
1105+
//
1106+
// If
1107+
// [ranking_expression_backend][google.cloud.discoveryengine.v1.SearchRequest.ranking_expression_backend]
1108+
// is set to `RANK_BY_FORMULA`, the following expression types (and
1109+
// combinations of those chained using + or
1110+
// * operators) are supported:
1111+
//
1112+
// * `double`
1113+
// * `signal`
1114+
// * `log(signal)`
1115+
// * `exp(signal)`
1116+
// * `rr(signal, double > 0)` -- reciprocal rank transformation with second
1117+
// argument being a denominator constant.
1118+
// * `is_nan(signal)` -- returns 0 if signal is NaN, 1 otherwise.
1119+
// * `fill_nan(signal1, signal2 | double)` -- if signal1 is NaN, returns
1120+
// signal2 | double, else returns signal1.
1121+
//
1122+
// Here are a few examples of ranking formulas that use the supported
1123+
// ranking expression types:
1124+
//
1125+
// - `0.2 * semantic_similarity_score + 0.8 * log(keyword_similarity_score)`
1126+
// -- mostly rank by the logarithm of `keyword_similarity_score` with slight
1127+
// `semantic_smilarity_score` adjustment.
1128+
// - `0.2 * exp(fill_nan(semantic_similarity_score, 0)) + 0.3 *
1129+
// is_nan(keyword_similarity_score)` -- rank by the exponent of
1130+
// `semantic_similarity_score` filling the value with 0 if it's NaN, also
1131+
// add constant 0.3 adjustment to the final score if
1132+
// `semantic_similarity_score` is NaN.
1133+
// - `0.2 * rr(semantic_similarity_score, 16) + 0.8 *
1134+
// rr(keyword_similarity_score, 16)` -- mostly rank by the reciprocal rank
1135+
// of `keyword_similarity_score` with slight adjustment of reciprocal rank
1136+
// of `semantic_smilarity_score`.
1137+
//
1138+
// The following signals are supported:
1139+
//
1140+
// * `semantic_similarity_score`: semantic similarity adjustment that is
1141+
// calculated using the embeddings generated by a proprietary Google model.
1142+
// This score determines how semantically similar a search query is to a
1143+
// document.
1144+
// * `keyword_similarity_score`: keyword match adjustment uses the Best
1145+
// Match 25 (BM25) ranking function. This score is calculated using a
1146+
// probabilistic model to estimate the probability that a document is
1147+
// relevant to a given query.
1148+
// * `relevance_score`: semantic relevance adjustment that uses a
1149+
// proprietary Google model to determine the meaning and intent behind a
1150+
// user's query in context with the content in the documents.
1151+
// * `pctr_rank`: predicted conversion rate adjustment as a rank use
1152+
// predicted Click-through rate (pCTR) to gauge the relevance and
1153+
// attractiveness of a search result from a user's perspective. A higher
1154+
// pCTR suggests that the result is more likely to satisfy the user's query
1155+
// and intent, making it a valuable signal for ranking.
1156+
// * `freshness_rank`: freshness adjustment as a rank
1157+
// * `document_age`: The time in hours elapsed since the document was last
1158+
// updated, a floating-point number (e.g., 0.25 means 15 minutes).
1159+
// * `topicality_rank`: topicality adjustment as a rank. Uses proprietary
1160+
// Google model to determine the keyword-based overlap between the query and
1161+
// the document.
1162+
// * `base_rank`: the default rank of the result
1163+
string ranking_expression = 26;
1164+
1165+
// The backend to use for the ranking expression evaluation.
1166+
enum RankingExpressionBackend {
1167+
reserved 1, 2;
1168+
1169+
// Default option for unspecified/unknown values.
1170+
RANKING_EXPRESSION_BACKEND_UNSPECIFIED = 0;
1171+
// Ranking by custom embedding model, the default way to evaluate the
1172+
// ranking expression.
1173+
RANK_BY_EMBEDDING = 3;
1174+
// Ranking by custom formula.
1175+
RANK_BY_FORMULA = 4;
1176+
}
1177+
1178+
// The backend to use for the ranking expression evaluation.
1179+
RankingExpressionBackend ranking_expression_backend = 53
1180+
[(google.api.field_behavior) = OPTIONAL];
10721181
}
10731182

10741183
// Response message for
@@ -1094,6 +1203,49 @@ message SearchResponse {
10941203
// Output only. Google provided available scores.
10951204
map<string, DoubleList> model_scores = 4
10961205
[(google.api.field_behavior) = OUTPUT_ONLY];
1206+
1207+
// A set of ranking signals.
1208+
message RankSignals {
1209+
reserved 5;
1210+
1211+
// Keyword matching adjustment.
1212+
optional float keyword_similarity_score = 1
1213+
[(google.api.field_behavior) = OPTIONAL];
1214+
// Semantic relevance adjustment.
1215+
optional float relevance_score = 2
1216+
[(google.api.field_behavior) = OPTIONAL];
1217+
// Semantic similarity adjustment.
1218+
optional float semantic_similarity_score = 3
1219+
[(google.api.field_behavior) = OPTIONAL];
1220+
// Predicted conversion rate adjustment as a rank.
1221+
optional float pctr_rank = 4 [(google.api.field_behavior) = OPTIONAL];
1222+
// Topicality adjustment as a rank.
1223+
optional float topicality_rank = 6
1224+
[(google.api.field_behavior) = OPTIONAL];
1225+
// Age of the document in hours.
1226+
optional float document_age = 7 [(google.api.field_behavior) = OPTIONAL];
1227+
// Combined custom boosts for a doc.
1228+
optional float boosting_factor = 8
1229+
[(google.api.field_behavior) = OPTIONAL];
1230+
1231+
// The default rank of the result.
1232+
float default_rank = 32 [(google.api.field_behavior) = OPTIONAL];
1233+
1234+
// Custom clearbox signal represented by name and value pair.
1235+
message CustomSignal {
1236+
// Name of the signal.
1237+
string name = 1 [(google.api.field_behavior) = OPTIONAL];
1238+
// Float value representing the ranking signal (e.g. 1.25 for BM25).
1239+
float value = 2 [(google.api.field_behavior) = OPTIONAL];
1240+
}
1241+
1242+
// A list of custom clearbox signals.
1243+
repeated CustomSignal custom_signals = 33
1244+
[(google.api.field_behavior) = OPTIONAL];
1245+
}
1246+
1247+
// A set of ranking signals associated with the result.
1248+
RankSignals rank_signals = 7 [(google.api.field_behavior) = OPTIONAL];
10971249
}
10981250

10991251
// A facet result.

google/cloud/discoveryengine/v1alpha/search_service.proto

Lines changed: 128 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -966,8 +966,14 @@ message SearchRequest {
966966
// The ranking expression controls the customized ranking on retrieval
967967
// documents. This overrides
968968
// [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1alpha.ServingConfig.ranking_expression].
969-
// The ranking expression is a single function or multiple functions that are
970-
// joined by "+".
969+
// The syntax and supported features depend on the
970+
// `ranking_expression_backend` value. If `ranking_expression_backend` is not
971+
// provided, it defaults to `RANK_BY_EMBEDDING`.
972+
//
973+
// If
974+
// [ranking_expression_backend][google.cloud.discoveryengine.v1alpha.SearchRequest.ranking_expression_backend]
975+
// is not provided or set to `RANK_BY_EMBEDDING`, it should be a single
976+
// function or multiple functions that are joined by "+".
971977
//
972978
// * ranking_expression = function, { " + ", function };
973979
//
@@ -982,15 +988,90 @@ message SearchRequest {
982988
// between query and document.
983989
// * `embedding_field_path`: the document embedding field
984990
// used with query embedding vector.
985-
// * `dotProduct`: embedding function between embedding_field_path and query
986-
// embedding vector.
991+
// * `dotProduct`: embedding function between `embedding_field_path` and
992+
// query embedding vector.
987993
//
988994
// Example ranking expression:
989995
//
990996
// If document has an embedding field doc_embedding, the ranking expression
991997
// could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`.
998+
//
999+
// If
1000+
// [ranking_expression_backend][google.cloud.discoveryengine.v1alpha.SearchRequest.ranking_expression_backend]
1001+
// is set to `RANK_BY_FORMULA`, the following expression types (and
1002+
// combinations of those chained using + or
1003+
// * operators) are supported:
1004+
//
1005+
// * `double`
1006+
// * `signal`
1007+
// * `log(signal)`
1008+
// * `exp(signal)`
1009+
// * `rr(signal, double > 0)` -- reciprocal rank transformation with second
1010+
// argument being a denominator constant.
1011+
// * `is_nan(signal)` -- returns 0 if signal is NaN, 1 otherwise.
1012+
// * `fill_nan(signal1, signal2 | double)` -- if signal1 is NaN, returns
1013+
// signal2 | double, else returns signal1.
1014+
//
1015+
// Here are a few examples of ranking formulas that use the supported
1016+
// ranking expression types:
1017+
//
1018+
// - `0.2 * semantic_similarity_score + 0.8 * log(keyword_similarity_score)`
1019+
// -- mostly rank by the logarithm of `keyword_similarity_score` with slight
1020+
// `semantic_smilarity_score` adjustment.
1021+
// - `0.2 * exp(fill_nan(semantic_similarity_score, 0)) + 0.3 *
1022+
// is_nan(keyword_similarity_score)` -- rank by the exponent of
1023+
// `semantic_similarity_score` filling the value with 0 if it's NaN, also
1024+
// add constant 0.3 adjustment to the final score if
1025+
// `semantic_similarity_score` is NaN.
1026+
// - `0.2 * rr(semantic_similarity_score, 16) + 0.8 *
1027+
// rr(keyword_similarity_score, 16)` -- mostly rank by the reciprocal rank
1028+
// of `keyword_similarity_score` with slight adjustment of reciprocal rank
1029+
// of `semantic_smilarity_score`.
1030+
//
1031+
// The following signals are supported:
1032+
//
1033+
// * `semantic_similarity_score`: semantic similarity adjustment that is
1034+
// calculated using the embeddings generated by a proprietary Google model.
1035+
// This score determines how semantically similar a search query is to a
1036+
// document.
1037+
// * `keyword_similarity_score`: keyword match adjustment uses the Best
1038+
// Match 25 (BM25) ranking function. This score is calculated using a
1039+
// probabilistic model to estimate the probability that a document is
1040+
// relevant to a given query.
1041+
// * `relevance_score`: semantic relevance adjustment that uses a
1042+
// proprietary Google model to determine the meaning and intent behind a
1043+
// user's query in context with the content in the documents.
1044+
// * `pctr_rank`: predicted conversion rate adjustment as a rank use
1045+
// predicted Click-through rate (pCTR) to gauge the relevance and
1046+
// attractiveness of a search result from a user's perspective. A higher
1047+
// pCTR suggests that the result is more likely to satisfy the user's query
1048+
// and intent, making it a valuable signal for ranking.
1049+
// * `freshness_rank`: freshness adjustment as a rank
1050+
// * `document_age`: The time in hours elapsed since the document was last
1051+
// updated, a floating-point number (e.g., 0.25 means 15 minutes).
1052+
// * `topicality_rank`: topicality adjustment as a rank. Uses proprietary
1053+
// Google model to determine the keyword-based overlap between the query and
1054+
// the document.
1055+
// * `base_rank`: the default rank of the result
9921056
string ranking_expression = 26;
9931057

1058+
// The backend to use for the ranking expression evaluation.
1059+
enum RankingExpressionBackend {
1060+
reserved 1, 2;
1061+
1062+
// Default option for unspecified/unknown values.
1063+
RANKING_EXPRESSION_BACKEND_UNSPECIFIED = 0;
1064+
// Ranking by custom embedding model, the default way to evaluate the
1065+
// ranking expression.
1066+
RANK_BY_EMBEDDING = 3;
1067+
// Ranking by custom formula.
1068+
RANK_BY_FORMULA = 4;
1069+
}
1070+
1071+
// The backend to use for the ranking expression evaluation.
1072+
RankingExpressionBackend ranking_expression_backend = 53
1073+
[(google.api.field_behavior) = OPTIONAL];
1074+
9941075
// Whether to turn on safe search. This is only supported for
9951076
// website search.
9961077
bool safe_search = 20;
@@ -1088,6 +1169,49 @@ message SearchResponse {
10881169

10891170
// Google provided available scores.
10901171
map<string, DoubleList> model_scores = 4;
1172+
1173+
// A set of ranking signals.
1174+
message RankSignals {
1175+
reserved 5;
1176+
1177+
// Keyword matching adjustment.
1178+
optional float keyword_similarity_score = 1
1179+
[(google.api.field_behavior) = OPTIONAL];
1180+
// Semantic relevance adjustment.
1181+
optional float relevance_score = 2
1182+
[(google.api.field_behavior) = OPTIONAL];
1183+
// Semantic similarity adjustment.
1184+
optional float semantic_similarity_score = 3
1185+
[(google.api.field_behavior) = OPTIONAL];
1186+
// Predicted conversion rate adjustment as a rank.
1187+
optional float pctr_rank = 4 [(google.api.field_behavior) = OPTIONAL];
1188+
// Topicality adjustment as a rank.
1189+
optional float topicality_rank = 6
1190+
[(google.api.field_behavior) = OPTIONAL];
1191+
// Age of the document in hours.
1192+
optional float document_age = 7 [(google.api.field_behavior) = OPTIONAL];
1193+
// Combined custom boosts for a doc.
1194+
optional float boosting_factor = 8
1195+
[(google.api.field_behavior) = OPTIONAL];
1196+
1197+
// The default rank of the result.
1198+
float default_rank = 32 [(google.api.field_behavior) = OPTIONAL];
1199+
1200+
// Custom clearbox signal represented by name and value pair.
1201+
message CustomSignal {
1202+
// Name of the signal.
1203+
string name = 1 [(google.api.field_behavior) = OPTIONAL];
1204+
// Float value representing the ranking signal (e.g. 1.25 for BM25).
1205+
float value = 2 [(google.api.field_behavior) = OPTIONAL];
1206+
}
1207+
1208+
// A list of custom clearbox signals.
1209+
repeated CustomSignal custom_signals = 33
1210+
[(google.api.field_behavior) = OPTIONAL];
1211+
}
1212+
1213+
// A set of ranking signals associated with the result.
1214+
RankSignals rank_signals = 7 [(google.api.field_behavior) = OPTIONAL];
10911215
}
10921216

10931217
// A facet result.

0 commit comments

Comments
 (0)