Skip to content

Commit 0830bd9

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add EmbedContent method v1
PiperOrigin-RevId: 823219007
1 parent 6e4f2a5 commit 0830bd9

File tree

3 files changed

+186
-0
lines changed

3 files changed

+186
-0
lines changed

google/cloud/aiplatform/v1/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ proto_library(
144144
"tuning_job.proto",
145145
"types.proto",
146146
"unmanaged_container_model.proto",
147+
"usage_metadata.proto",
147148
"user_action_reference.proto",
148149
"value.proto",
149150
"vertex_rag_data.proto",

google/cloud/aiplatform/v1/prediction_service.proto

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import "google/cloud/aiplatform/v1/content.proto";
2525
import "google/cloud/aiplatform/v1/explanation.proto";
2626
import "google/cloud/aiplatform/v1/tool.proto";
2727
import "google/cloud/aiplatform/v1/types.proto";
28+
import "google/cloud/aiplatform/v1/usage_metadata.proto";
2829
import "google/protobuf/struct.proto";
2930
import "google/protobuf/timestamp.proto";
3031

@@ -208,6 +209,15 @@ service PredictionService {
208209
};
209210
option (google.api.method_signature) = "model,contents";
210211
}
212+
213+
// Embed content with multimodal inputs.
214+
rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
215+
option (google.api.http) = {
216+
post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:embedContent"
217+
body: "*"
218+
};
219+
option (google.api.method_signature) = "model,content";
220+
}
211221
}
212222

213223
// Request message for
@@ -850,3 +860,82 @@ message GenerateContentResponse {
850860
// Usage metadata about the response(s).
851861
UsageMetadata usage_metadata = 4;
852862
}
863+
864+
// Request message for
865+
// [PredictionService.EmbedContent][google.cloud.aiplatform.v1.PredictionService.EmbedContent].
866+
message EmbedContentRequest {
867+
// Represents a downstream task the embeddings will be used for.
868+
enum EmbeddingTaskType {
869+
// Unset value, which will default to one of the other enum values.
870+
UNSPECIFIED = 0;
871+
872+
// Specifies the given text is a query in a search/retrieval setting.
873+
RETRIEVAL_QUERY = 2;
874+
875+
// Specifies the given text is a document from the corpus being searched.
876+
RETRIEVAL_DOCUMENT = 3;
877+
878+
// Specifies the given text will be used for STS.
879+
SEMANTIC_SIMILARITY = 4;
880+
881+
// Specifies that the given text will be classified.
882+
CLASSIFICATION = 5;
883+
884+
// Specifies that the embeddings will be used for clustering.
885+
CLUSTERING = 6;
886+
887+
// Specifies that the embeddings will be used for question answering.
888+
QUESTION_ANSWERING = 7;
889+
890+
// Specifies that the embeddings will be used for fact verification.
891+
FACT_VERIFICATION = 8;
892+
893+
// Specifies that the embeddings will be used for code retrieval.
894+
CODE_RETRIEVAL_QUERY = 9;
895+
}
896+
897+
// Required. The name of the publisher model requested to serve the
898+
// prediction. Format:
899+
// `projects/{project}/locations/{location}/publishers/*/models/*`
900+
optional string model = 1 [(google.api.resource_reference) = {
901+
type: "aiplatform.googleapis.com/Endpoint"
902+
}];
903+
904+
// Required. Input content to be embedded. Required.
905+
optional Content content = 2;
906+
907+
// Optional. An optional title for the text.
908+
optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
909+
910+
// Optional. The task type of the embedding.
911+
optional EmbeddingTaskType task_type = 5
912+
[(google.api.field_behavior) = OPTIONAL];
913+
914+
// Optional. Optional reduced dimension for the output embedding. If set,
915+
// excessive values in the output embedding are truncated from the end.
916+
optional int32 output_dimensionality = 6
917+
[(google.api.field_behavior) = OPTIONAL];
918+
919+
// Optional. Whether to silently truncate the input content if it's longer
920+
// than the maximum sequence length.
921+
optional bool auto_truncate = 7 [(google.api.field_behavior) = OPTIONAL];
922+
}
923+
924+
// Response message for
925+
// [PredictionService.EmbedContent][google.cloud.aiplatform.v1.PredictionService.EmbedContent].
926+
message EmbedContentResponse {
927+
// A list of floats representing an embedding.
928+
message Embedding {
929+
// Embedding vector values.
930+
repeated float values = 1;
931+
}
932+
933+
// The embedding generated from the input content.
934+
Embedding embedding = 1;
935+
936+
// Metadata about the response(s).
937+
UsageMetadata usage_metadata = 2;
938+
939+
// Whether the input content was truncated before generating the embedding.
940+
bool truncated = 4;
941+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.aiplatform.v1;
18+
19+
import "google/api/field_behavior.proto";
20+
import "google/cloud/aiplatform/v1/content.proto";
21+
22+
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
23+
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
24+
option java_multiple_files = true;
25+
option java_outer_classname = "UsageMetadataProto";
26+
option java_package = "com.google.cloud.aiplatform.v1";
27+
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
28+
option ruby_package = "Google::Cloud::AIPlatform::V1";
29+
30+
// Usage metadata about the content generation request and response.
31+
// This message provides a detailed breakdown of token usage and other
32+
// relevant metrics.
33+
message UsageMetadata {
34+
// The type of traffic that this request was processed with, indicating which
35+
// quota gets consumed.
36+
enum TrafficType {
37+
// Unspecified request traffic type.
38+
TRAFFIC_TYPE_UNSPECIFIED = 0;
39+
40+
// Type for Pay-As-You-Go traffic.
41+
ON_DEMAND = 1;
42+
43+
// Type for Provisioned Throughput traffic.
44+
PROVISIONED_THROUGHPUT = 2;
45+
}
46+
47+
// The total number of tokens in the prompt. This includes any text, images,
48+
// or other media provided in the request. When `cached_content` is set,
49+
// this also includes the number of tokens in the cached content.
50+
int32 prompt_token_count = 1;
51+
52+
// The total number of tokens in the generated candidates.
53+
int32 candidates_token_count = 2;
54+
55+
// The total number of tokens for the entire request. This is the sum of
56+
// `prompt_token_count`, `candidates_token_count`,
57+
// `tool_use_prompt_token_count`, and `thoughts_token_count`.
58+
int32 total_token_count = 3;
59+
60+
// Output only. The number of tokens in the results from tool executions,
61+
// which are provided back to the model as input, if applicable.
62+
int32 tool_use_prompt_token_count = 13
63+
[(google.api.field_behavior) = OUTPUT_ONLY];
64+
65+
// Output only. The number of tokens that were part of the model's generated
66+
// "thoughts" output, if applicable.
67+
int32 thoughts_token_count = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
68+
69+
// Output only. The number of tokens in the cached content that was used for
70+
// this request.
71+
int32 cached_content_token_count = 5
72+
[(google.api.field_behavior) = OUTPUT_ONLY];
73+
74+
// Output only. A detailed breakdown of the token count for each modality in
75+
// the prompt.
76+
repeated ModalityTokenCount prompt_tokens_details = 9
77+
[(google.api.field_behavior) = OUTPUT_ONLY];
78+
79+
// Output only. A detailed breakdown of the token count for each modality in
80+
// the cached content.
81+
repeated ModalityTokenCount cache_tokens_details = 10
82+
[(google.api.field_behavior) = OUTPUT_ONLY];
83+
84+
// Output only. A detailed breakdown of the token count for each modality in
85+
// the generated candidates.
86+
repeated ModalityTokenCount candidates_tokens_details = 11
87+
[(google.api.field_behavior) = OUTPUT_ONLY];
88+
89+
// Output only. A detailed breakdown by modality of the token counts from the
90+
// results of tool executions, which are provided back to the model as input.
91+
repeated ModalityTokenCount tool_use_prompt_tokens_details = 12
92+
[(google.api.field_behavior) = OUTPUT_ONLY];
93+
94+
// Output only. The traffic type for this request.
95+
TrafficType traffic_type = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
96+
}

0 commit comments

Comments
 (0)