feat: Add EmbedContent method v1

Google APIs · copybara-github · commit 0830bd94ddc5 · 2025-10-23T15:53:32.000-07:00
PiperOrigin-RevId: 823219007
diff --git a/google/cloud/aiplatform/v1/BUILD.bazel b/google/cloud/aiplatform/v1/BUILD.bazel
@@ -144,6 +144,7 @@ proto_library(
         "tuning_job.proto",
         "types.proto",
         "unmanaged_container_model.proto",
+        "usage_metadata.proto",
         "user_action_reference.proto",
         "value.proto",
         "vertex_rag_data.proto",
diff --git a/google/cloud/aiplatform/v1/prediction_service.proto b/google/cloud/aiplatform/v1/prediction_service.proto
@@ -25,6 +25,7 @@ import "google/cloud/aiplatform/v1/content.proto";
 import "google/cloud/aiplatform/v1/explanation.proto";
 import "google/cloud/aiplatform/v1/tool.proto";
 import "google/cloud/aiplatform/v1/types.proto";
+import "google/cloud/aiplatform/v1/usage_metadata.proto";
 import "google/protobuf/struct.proto";
 import "google/protobuf/timestamp.proto";
 
@@ -208,6 +209,15 @@ service PredictionService {
     };
     option (google.api.method_signature) = "model,contents";
   }
+
+  // Embed content with multimodal inputs.
+  rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
+    option (google.api.http) = {
+      post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:embedContent"
+      body: "*"
+    };
+    option (google.api.method_signature) = "model,content";
+  }
 }
 
 // Request message for
@@ -850,3 +860,82 @@ message GenerateContentResponse {
   // Usage metadata about the response(s).
   UsageMetadata usage_metadata = 4;
 }
+
+// Request message for
+// [PredictionService.EmbedContent][google.cloud.aiplatform.v1.PredictionService.EmbedContent].
+message EmbedContentRequest {
+  // Represents a downstream task the embeddings will be used for.
+  enum EmbeddingTaskType {
+    // Unset value, which will default to one of the other enum values.
+    UNSPECIFIED = 0;
+
+    // Specifies the given text is a query in a search/retrieval setting.
+    RETRIEVAL_QUERY = 2;
+
+    // Specifies the given text is a document from the corpus being searched.
+    RETRIEVAL_DOCUMENT = 3;
+
+    // Specifies the given text will be used for STS.
+    SEMANTIC_SIMILARITY = 4;
+
+    // Specifies that the given text will be classified.
+    CLASSIFICATION = 5;
+
+    // Specifies that the embeddings will be used for clustering.
+    CLUSTERING = 6;
+
+    // Specifies that the embeddings will be used for question answering.
+    QUESTION_ANSWERING = 7;
+
+    // Specifies that the embeddings will be used for fact verification.
+    FACT_VERIFICATION = 8;
+
+    // Specifies that the embeddings will be used for code retrieval.
+    CODE_RETRIEVAL_QUERY = 9;
+  }
+
+  // Required. The name of the publisher model requested to serve the
+  // prediction. Format:
+  // `projects/{project}/locations/{location}/publishers/*/models/*`
+  optional string model = 1 [(google.api.resource_reference) = {
+    type: "aiplatform.googleapis.com/Endpoint"
+  }];
+
+  // Required. Input content to be embedded. Required.
+  optional Content content = 2;
+
+  // Optional. An optional title for the text.
+  optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. The task type of the embedding.
+  optional EmbeddingTaskType task_type = 5
+      [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Optional reduced dimension for the output embedding. If set,
+  // excessive values in the output embedding are truncated from the end.
+  optional int32 output_dimensionality = 6
+      [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Whether to silently truncate the input content if it's longer
+  // than the maximum sequence length.
+  optional bool auto_truncate = 7 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Response message for
+// [PredictionService.EmbedContent][google.cloud.aiplatform.v1.PredictionService.EmbedContent].
+message EmbedContentResponse {
+  // A list of floats representing an embedding.
+  message Embedding {
+    // Embedding vector values.
+    repeated float values = 1;
+  }
+
+  // The embedding generated from the input content.
+  Embedding embedding = 1;
+
+  // Metadata about the response(s).
+  UsageMetadata usage_metadata = 2;
+
+  // Whether the input content was truncated before generating the embedding.
+  bool truncated = 4;
+}
diff --git a/google/cloud/aiplatform/v1/usage_metadata.proto b/google/cloud/aiplatform/v1/usage_metadata.proto
@@ -0,0 +1,96 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/field_behavior.proto";
+import "google/cloud/aiplatform/v1/content.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "UsageMetadataProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+
+// Usage metadata about the content generation request and response.
+// This message provides a detailed breakdown of token usage and other
+// relevant metrics.
+message UsageMetadata {
+  // The type of traffic that this request was processed with, indicating which
+  // quota gets consumed.
+  enum TrafficType {
+    // Unspecified request traffic type.
+    TRAFFIC_TYPE_UNSPECIFIED = 0;
+
+    // Type for Pay-As-You-Go traffic.
+    ON_DEMAND = 1;
+
+    // Type for Provisioned Throughput traffic.
+    PROVISIONED_THROUGHPUT = 2;
+  }
+
+  // The total number of tokens in the prompt. This includes any text, images,
+  // or other media provided in the request. When `cached_content` is set,
+  // this also includes the number of tokens in the cached content.
+  int32 prompt_token_count = 1;
+
+  // The total number of tokens in the generated candidates.
+  int32 candidates_token_count = 2;
+
+  // The total number of tokens for the entire request. This is the sum of
+  // `prompt_token_count`, `candidates_token_count`,
+  // `tool_use_prompt_token_count`, and `thoughts_token_count`.
+  int32 total_token_count = 3;
+
+  // Output only. The number of tokens in the results from tool executions,
+  // which are provided back to the model as input, if applicable.
+  int32 tool_use_prompt_token_count = 13
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The number of tokens that were part of the model's generated
+  // "thoughts" output, if applicable.
+  int32 thoughts_token_count = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The number of tokens in the cached content that was used for
+  // this request.
+  int32 cached_content_token_count = 5
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A detailed breakdown of the token count for each modality in
+  // the prompt.
+  repeated ModalityTokenCount prompt_tokens_details = 9
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A detailed breakdown of the token count for each modality in
+  // the cached content.
+  repeated ModalityTokenCount cache_tokens_details = 10
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A detailed breakdown of the token count for each modality in
+  // the generated candidates.
+  repeated ModalityTokenCount candidates_tokens_details = 11
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. A detailed breakdown by modality of the token counts from the
+  // results of tool executions, which are provided back to the model as input.
+  repeated ModalityTokenCount tool_use_prompt_tokens_details = 12
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The traffic type for this request.
+  TrafficType traffic_type = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
+}