feat: Added ranking_expression_backed and rank_signals fields related to the Custom Ranking feature

Google APIs · copybara-github · commit 0ef98bcac446 · 2025-09-10T04:44:15.000-07:00
docs: A comment for field `ranking_expression` in messages `.google.cloud.discoveryengine.v1alpha.SearchRequest` and `.google.cloud.discoveryengine.v1beta.SearchRequest` is changed to support the Custom Ranking use case

PiperOrigin-RevId: 805306845
diff --git a/google/cloud/discoveryengine/v1/search_service.proto b/google/cloud/discoveryengine/v1/search_service.proto
@@ -1069,6 +1069,115 @@ message SearchRequest {
   // Optional. The specification for returning the relevance score.
   RelevanceScoreSpec relevance_score_spec = 52
       [(google.api.field_behavior) = OPTIONAL];
+
+  // The ranking expression controls the customized ranking on retrieval
+  // documents. This overrides
+  // [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1.ServingConfig.ranking_expression].
+  // The syntax and supported features depend on the
+  // `ranking_expression_backend` value. If `ranking_expression_backend` is not
+  // provided, it defaults to `RANK_BY_EMBEDDING`.
+  //
+  // If
+  // [ranking_expression_backend][google.cloud.discoveryengine.v1.SearchRequest.ranking_expression_backend]
+  // is not provided or set to `RANK_BY_EMBEDDING`, it should be a single
+  // function or multiple functions that are joined by "+".
+  //
+  //   * ranking_expression = function, { " + ", function };
+  //
+  // Supported functions:
+  //
+  //   * double * relevance_score
+  //   * double * dotProduct(embedding_field_path)
+  //
+  // Function variables:
+  //
+  //   * `relevance_score`: pre-defined keywords, used for measure relevance
+  //   between query and document.
+  //   * `embedding_field_path`: the document embedding field
+  //   used with query embedding vector.
+  //   * `dotProduct`: embedding function between `embedding_field_path` and
+  //   query embedding vector.
+  //
+  //  Example ranking expression:
+  //
+  //    If document has an embedding field doc_embedding, the ranking expression
+  //    could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`.
+  //
+  // If
+  // [ranking_expression_backend][google.cloud.discoveryengine.v1.SearchRequest.ranking_expression_backend]
+  // is set to `RANK_BY_FORMULA`, the following expression types (and
+  // combinations of those chained using + or
+  // * operators) are supported:
+  //
+  //   * `double`
+  //   * `signal`
+  //   * `log(signal)`
+  //   * `exp(signal)`
+  //   * `rr(signal, double > 0)`  -- reciprocal rank transformation with second
+  //   argument being a denominator constant.
+  //   * `is_nan(signal)` -- returns 0 if signal is NaN, 1 otherwise.
+  //   * `fill_nan(signal1, signal2 | double)` -- if signal1 is NaN, returns
+  //   signal2 | double, else returns signal1.
+  //
+  //   Here are a few examples of ranking formulas that use the supported
+  //   ranking expression types:
+  //
+  //   - `0.2 * semantic_similarity_score + 0.8 * log(keyword_similarity_score)`
+  //   -- mostly rank by the logarithm of `keyword_similarity_score` with slight
+  //   `semantic_smilarity_score` adjustment.
+  //   - `0.2 * exp(fill_nan(semantic_similarity_score, 0)) + 0.3 *
+  //   is_nan(keyword_similarity_score)` -- rank by the exponent of
+  //   `semantic_similarity_score` filling the value with 0 if it's NaN, also
+  //   add constant 0.3 adjustment to the final score if
+  //   `semantic_similarity_score` is NaN.
+  //   - `0.2 * rr(semantic_similarity_score, 16) + 0.8 *
+  //   rr(keyword_similarity_score, 16)` -- mostly rank by the reciprocal rank
+  //   of `keyword_similarity_score` with slight adjustment of reciprocal rank
+  //   of `semantic_smilarity_score`.
+  //
+  // The following signals are supported:
+  //
+  //   * `semantic_similarity_score`: semantic similarity adjustment that is
+  //   calculated using the embeddings generated by a proprietary Google model.
+  //   This score determines how semantically similar a search query is to a
+  //   document.
+  //   * `keyword_similarity_score`: keyword match adjustment uses the Best
+  //   Match 25 (BM25) ranking function. This score is calculated using a
+  //   probabilistic model to estimate the probability that a document is
+  //   relevant to a given query.
+  //   * `relevance_score`: semantic relevance adjustment that uses a
+  //   proprietary Google model to determine the meaning and intent behind a
+  //   user's query in context with the content in the documents.
+  //   * `pctr_rank`: predicted conversion rate adjustment as a rank use
+  //   predicted Click-through rate (pCTR) to gauge the relevance and
+  //   attractiveness of a search result from a user's perspective. A higher
+  //   pCTR suggests that the result is more likely to satisfy the user's query
+  //   and intent, making it a valuable signal for ranking.
+  //   * `freshness_rank`: freshness adjustment as a rank
+  //   * `document_age`: The time in hours elapsed since the document was last
+  //   updated, a floating-point number (e.g., 0.25 means 15 minutes).
+  //   * `topicality_rank`: topicality adjustment as a rank. Uses proprietary
+  //   Google model to determine the keyword-based overlap between the query and
+  //   the document.
+  //   * `base_rank`: the default rank of the result
+  string ranking_expression = 26;
+
+  // The backend to use for the ranking expression evaluation.
+  enum RankingExpressionBackend {
+    reserved 1, 2;
+
+    // Default option for unspecified/unknown values.
+    RANKING_EXPRESSION_BACKEND_UNSPECIFIED = 0;
+    // Ranking by custom embedding model, the default way to evaluate the
+    // ranking expression.
+    RANK_BY_EMBEDDING = 3;
+    // Ranking by custom formula.
+    RANK_BY_FORMULA = 4;
+  }
+
+  // The backend to use for the ranking expression evaluation.
+  RankingExpressionBackend ranking_expression_backend = 53
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Response message for
@@ -1094,6 +1203,49 @@ message SearchResponse {
     // Output only. Google provided available scores.
     map<string, DoubleList> model_scores = 4
         [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // A set of ranking signals.
+    message RankSignals {
+      reserved 5;
+
+      // Keyword matching adjustment.
+      optional float keyword_similarity_score = 1
+          [(google.api.field_behavior) = OPTIONAL];
+      // Semantic relevance adjustment.
+      optional float relevance_score = 2
+          [(google.api.field_behavior) = OPTIONAL];
+      // Semantic similarity adjustment.
+      optional float semantic_similarity_score = 3
+          [(google.api.field_behavior) = OPTIONAL];
+      // Predicted conversion rate adjustment as a rank.
+      optional float pctr_rank = 4 [(google.api.field_behavior) = OPTIONAL];
+      // Topicality adjustment as a rank.
+      optional float topicality_rank = 6
+          [(google.api.field_behavior) = OPTIONAL];
+      // Age of the document in hours.
+      optional float document_age = 7 [(google.api.field_behavior) = OPTIONAL];
+      // Combined custom boosts for a doc.
+      optional float boosting_factor = 8
+          [(google.api.field_behavior) = OPTIONAL];
+
+      // The default rank of the result.
+      float default_rank = 32 [(google.api.field_behavior) = OPTIONAL];
+
+      // Custom clearbox signal represented by name and value pair.
+      message CustomSignal {
+        // Name of the signal.
+        string name = 1 [(google.api.field_behavior) = OPTIONAL];
+        // Float value representing the ranking signal (e.g. 1.25 for BM25).
+        float value = 2 [(google.api.field_behavior) = OPTIONAL];
+      }
+
+      // A list of custom clearbox signals.
+      repeated CustomSignal custom_signals = 33
+          [(google.api.field_behavior) = OPTIONAL];
+    }
+
+    // A set of ranking signals associated with the result.
+    RankSignals rank_signals = 7 [(google.api.field_behavior) = OPTIONAL];
   }
 
   // A facet result.
diff --git a/google/cloud/discoveryengine/v1alpha/search_service.proto b/google/cloud/discoveryengine/v1alpha/search_service.proto
@@ -966,8 +966,14 @@ message SearchRequest {
   // The ranking expression controls the customized ranking on retrieval
   // documents. This overrides
   // [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1alpha.ServingConfig.ranking_expression].
-  // The ranking expression is a single function or multiple functions that are
-  // joined by "+".
+  // The syntax and supported features depend on the
+  // `ranking_expression_backend` value. If `ranking_expression_backend` is not
+  // provided, it defaults to `RANK_BY_EMBEDDING`.
+  //
+  // If
+  // [ranking_expression_backend][google.cloud.discoveryengine.v1alpha.SearchRequest.ranking_expression_backend]
+  // is not provided or set to `RANK_BY_EMBEDDING`, it should be a single
+  // function or multiple functions that are joined by "+".
   //
   //   * ranking_expression = function, { " + ", function };
   //
@@ -982,15 +988,90 @@ message SearchRequest {
   //   between query and document.
   //   * `embedding_field_path`: the document embedding field
   //   used with query embedding vector.
-  //   * `dotProduct`: embedding function between embedding_field_path and query
-  //   embedding vector.
+  //   * `dotProduct`: embedding function between `embedding_field_path` and
+  //   query embedding vector.
   //
   //  Example ranking expression:
   //
   //    If document has an embedding field doc_embedding, the ranking expression
   //    could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`.
+  //
+  // If
+  // [ranking_expression_backend][google.cloud.discoveryengine.v1alpha.SearchRequest.ranking_expression_backend]
+  // is set to `RANK_BY_FORMULA`, the following expression types (and
+  // combinations of those chained using + or
+  // * operators) are supported:
+  //
+  //   * `double`
+  //   * `signal`
+  //   * `log(signal)`
+  //   * `exp(signal)`
+  //   * `rr(signal, double > 0)`  -- reciprocal rank transformation with second
+  //   argument being a denominator constant.
+  //   * `is_nan(signal)` -- returns 0 if signal is NaN, 1 otherwise.
+  //   * `fill_nan(signal1, signal2 | double)` -- if signal1 is NaN, returns
+  //   signal2 | double, else returns signal1.
+  //
+  //   Here are a few examples of ranking formulas that use the supported
+  //   ranking expression types:
+  //
+  //   - `0.2 * semantic_similarity_score + 0.8 * log(keyword_similarity_score)`
+  //   -- mostly rank by the logarithm of `keyword_similarity_score` with slight
+  //   `semantic_smilarity_score` adjustment.
+  //   - `0.2 * exp(fill_nan(semantic_similarity_score, 0)) + 0.3 *
+  //   is_nan(keyword_similarity_score)` -- rank by the exponent of
+  //   `semantic_similarity_score` filling the value with 0 if it's NaN, also
+  //   add constant 0.3 adjustment to the final score if
+  //   `semantic_similarity_score` is NaN.
+  //   - `0.2 * rr(semantic_similarity_score, 16) + 0.8 *
+  //   rr(keyword_similarity_score, 16)` -- mostly rank by the reciprocal rank
+  //   of `keyword_similarity_score` with slight adjustment of reciprocal rank
+  //   of `semantic_smilarity_score`.
+  //
+  // The following signals are supported:
+  //
+  //   * `semantic_similarity_score`: semantic similarity adjustment that is
+  //   calculated using the embeddings generated by a proprietary Google model.
+  //   This score determines how semantically similar a search query is to a
+  //   document.
+  //   * `keyword_similarity_score`: keyword match adjustment uses the Best
+  //   Match 25 (BM25) ranking function. This score is calculated using a
+  //   probabilistic model to estimate the probability that a document is
+  //   relevant to a given query.
+  //   * `relevance_score`: semantic relevance adjustment that uses a
+  //   proprietary Google model to determine the meaning and intent behind a
+  //   user's query in context with the content in the documents.
+  //   * `pctr_rank`: predicted conversion rate adjustment as a rank use
+  //   predicted Click-through rate (pCTR) to gauge the relevance and
+  //   attractiveness of a search result from a user's perspective. A higher
+  //   pCTR suggests that the result is more likely to satisfy the user's query
+  //   and intent, making it a valuable signal for ranking.
+  //   * `freshness_rank`: freshness adjustment as a rank
+  //   * `document_age`: The time in hours elapsed since the document was last
+  //   updated, a floating-point number (e.g., 0.25 means 15 minutes).
+  //   * `topicality_rank`: topicality adjustment as a rank. Uses proprietary
+  //   Google model to determine the keyword-based overlap between the query and
+  //   the document.
+  //   * `base_rank`: the default rank of the result
   string ranking_expression = 26;
 
+  // The backend to use for the ranking expression evaluation.
+  enum RankingExpressionBackend {
+    reserved 1, 2;
+
+    // Default option for unspecified/unknown values.
+    RANKING_EXPRESSION_BACKEND_UNSPECIFIED = 0;
+    // Ranking by custom embedding model, the default way to evaluate the
+    // ranking expression.
+    RANK_BY_EMBEDDING = 3;
+    // Ranking by custom formula.
+    RANK_BY_FORMULA = 4;
+  }
+
+  // The backend to use for the ranking expression evaluation.
+  RankingExpressionBackend ranking_expression_backend = 53
+      [(google.api.field_behavior) = OPTIONAL];
+
   // Whether to turn on safe search. This is only supported for
   // website search.
   bool safe_search = 20;
@@ -1088,6 +1169,49 @@ message SearchResponse {
 
     // Google provided available scores.
     map<string, DoubleList> model_scores = 4;
+
+    // A set of ranking signals.
+    message RankSignals {
+      reserved 5;
+
+      // Keyword matching adjustment.
+      optional float keyword_similarity_score = 1
+          [(google.api.field_behavior) = OPTIONAL];
+      // Semantic relevance adjustment.
+      optional float relevance_score = 2
+          [(google.api.field_behavior) = OPTIONAL];
+      // Semantic similarity adjustment.
+      optional float semantic_similarity_score = 3
+          [(google.api.field_behavior) = OPTIONAL];
+      // Predicted conversion rate adjustment as a rank.
+      optional float pctr_rank = 4 [(google.api.field_behavior) = OPTIONAL];
+      // Topicality adjustment as a rank.
+      optional float topicality_rank = 6
+          [(google.api.field_behavior) = OPTIONAL];
+      // Age of the document in hours.
+      optional float document_age = 7 [(google.api.field_behavior) = OPTIONAL];
+      // Combined custom boosts for a doc.
+      optional float boosting_factor = 8
+          [(google.api.field_behavior) = OPTIONAL];
+
+      // The default rank of the result.
+      float default_rank = 32 [(google.api.field_behavior) = OPTIONAL];
+
+      // Custom clearbox signal represented by name and value pair.
+      message CustomSignal {
+        // Name of the signal.
+        string name = 1 [(google.api.field_behavior) = OPTIONAL];
+        // Float value representing the ranking signal (e.g. 1.25 for BM25).
+        float value = 2 [(google.api.field_behavior) = OPTIONAL];
+      }
+
+      // A list of custom clearbox signals.
+      repeated CustomSignal custom_signals = 33
+          [(google.api.field_behavior) = OPTIONAL];
+    }
+
+    // A set of ranking signals associated with the result.
+    RankSignals rank_signals = 7 [(google.api.field_behavior) = OPTIONAL];
   }
 
   // A facet result.
diff --git a/google/cloud/discoveryengine/v1beta/search_service.proto b/google/cloud/discoveryengine/v1beta/search_service.proto