Skip to content

Commit 51b75ed

Browse files
Google APIscopybara-github
authored andcommitted
feat: add Context Cache to v1
PiperOrigin-RevId: 714842212
1 parent ab068fb commit 51b75ed

5 files changed

Lines changed: 330 additions & 0 deletions

File tree

google/cloud/aiplatform/v1/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ proto_library(
3434
"api_auth.proto",
3535
"artifact.proto",
3636
"batch_prediction_job.proto",
37+
"cached_content.proto",
3738
"completion_stats.proto",
3839
"content.proto",
3940
"context.proto",
@@ -72,6 +73,7 @@ proto_library(
7273
"featurestore_monitoring.proto",
7374
"featurestore_online_service.proto",
7475
"featurestore_service.proto",
76+
"gen_ai_cache_service.proto",
7577
"genai_tuning_service.proto",
7678
"hyperparameter_tuning_job.proto",
7779
"index.proto",

google/cloud/aiplatform/v1/aiplatform_v1.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ apis:
1313
- name: google.cloud.aiplatform.v1.FeatureRegistryService
1414
- name: google.cloud.aiplatform.v1.FeaturestoreOnlineServingService
1515
- name: google.cloud.aiplatform.v1.FeaturestoreService
16+
- name: google.cloud.aiplatform.v1.GenAiCacheService
1617
- name: google.cloud.aiplatform.v1.GenAiTuningService
1718
- name: google.cloud.aiplatform.v1.IndexEndpointService
1819
- name: google.cloud.aiplatform.v1.IndexService
@@ -749,6 +750,10 @@ authentication:
749750
oauth:
750751
canonical_scopes: |-
751752
https://www.googleapis.com/auth/cloud-platform
753+
- selector: 'google.cloud.aiplatform.v1.GenAiCacheService.*'
754+
oauth:
755+
canonical_scopes: |-
756+
https://www.googleapis.com/auth/cloud-platform
752757
- selector: 'google.cloud.aiplatform.v1.GenAiTuningService.*'
753758
oauth:
754759
canonical_scopes: |-
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// Copyright 2024 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.aiplatform.v1;
18+
19+
import "google/api/field_behavior.proto";
20+
import "google/api/resource.proto";
21+
import "google/cloud/aiplatform/v1/content.proto";
22+
import "google/cloud/aiplatform/v1/tool.proto";
23+
import "google/protobuf/duration.proto";
24+
import "google/protobuf/timestamp.proto";
25+
26+
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
27+
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
28+
option java_multiple_files = true;
29+
option java_outer_classname = "CachedContentProto";
30+
option java_package = "com.google.cloud.aiplatform.v1";
31+
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
32+
option ruby_package = "Google::Cloud::AIPlatform::V1";
33+
34+
// A resource used in LLM queries for users to explicitly specify what to cache
35+
// and how to cache.
36+
message CachedContent {
37+
option (google.api.resource) = {
38+
type: "aiplatform.googleapis.com/CachedContent"
39+
pattern: "projects/{project}/locations/{location}/cachedContents/{cached_content}"
40+
plural: "cachedContents"
41+
singular: "cachedContent"
42+
};
43+
44+
// Metadata on the usage of the cached content.
45+
message UsageMetadata {
46+
// Total number of tokens that the cached content consumes.
47+
int32 total_token_count = 1;
48+
49+
// Number of text characters.
50+
int32 text_count = 2;
51+
52+
// Number of images.
53+
int32 image_count = 3;
54+
55+
// Duration of video in seconds.
56+
int32 video_duration_seconds = 4;
57+
58+
// Duration of audio in seconds.
59+
int32 audio_duration_seconds = 5;
60+
}
61+
62+
// Expiration time of the cached content.
63+
oneof expiration {
64+
// Timestamp of when this resource is considered expired.
65+
// This is *always* provided on output, regardless of what was sent
66+
// on input.
67+
google.protobuf.Timestamp expire_time = 9;
68+
69+
// Input only. The TTL for this resource. The expiration time is computed:
70+
// now + TTL.
71+
google.protobuf.Duration ttl = 10
72+
[(google.api.field_behavior) = INPUT_ONLY];
73+
}
74+
75+
// Immutable. Identifier. The server-generated resource name of the cached
76+
// content Format:
77+
// projects/{project}/locations/{location}/cachedContents/{cached_content}
78+
string name = 1 [
79+
(google.api.field_behavior) = IDENTIFIER,
80+
(google.api.field_behavior) = IMMUTABLE
81+
];
82+
83+
// Optional. Immutable. The user-generated meaningful display name of the
84+
// cached content.
85+
string display_name = 11 [
86+
(google.api.field_behavior) = OPTIONAL,
87+
(google.api.field_behavior) = IMMUTABLE
88+
];
89+
90+
// Immutable. The name of the publisher model to use for cached content.
91+
// Format:
92+
// projects/{project}/locations/{location}/publishers/{publisher}/models/{model}
93+
string model = 2 [(google.api.field_behavior) = IMMUTABLE];
94+
95+
// Optional. Input only. Immutable. Developer set system instruction.
96+
// Currently, text only
97+
Content system_instruction = 3 [
98+
(google.api.field_behavior) = OPTIONAL,
99+
(google.api.field_behavior) = IMMUTABLE,
100+
(google.api.field_behavior) = INPUT_ONLY
101+
];
102+
103+
// Optional. Input only. Immutable. The content to cache
104+
repeated Content contents = 4 [
105+
(google.api.field_behavior) = OPTIONAL,
106+
(google.api.field_behavior) = IMMUTABLE,
107+
(google.api.field_behavior) = INPUT_ONLY
108+
];
109+
110+
// Optional. Input only. Immutable. A list of `Tools` the model may use to
111+
// generate the next response
112+
repeated Tool tools = 5 [
113+
(google.api.field_behavior) = OPTIONAL,
114+
(google.api.field_behavior) = IMMUTABLE,
115+
(google.api.field_behavior) = INPUT_ONLY
116+
];
117+
118+
// Optional. Input only. Immutable. Tool config. This config is shared for all
119+
// tools
120+
ToolConfig tool_config = 6 [
121+
(google.api.field_behavior) = OPTIONAL,
122+
(google.api.field_behavior) = IMMUTABLE,
123+
(google.api.field_behavior) = INPUT_ONLY
124+
];
125+
126+
// Output only. Creatation time of the cache entry.
127+
google.protobuf.Timestamp create_time = 7
128+
[(google.api.field_behavior) = OUTPUT_ONLY];
129+
130+
// Output only. When the cache entry was last updated in UTC time.
131+
google.protobuf.Timestamp update_time = 8
132+
[(google.api.field_behavior) = OUTPUT_ONLY];
133+
134+
// Output only. Metadata on the usage of the cached content.
135+
UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
136+
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// Copyright 2024 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.cloud.aiplatform.v1;
18+
19+
import "google/api/annotations.proto";
20+
import "google/api/client.proto";
21+
import "google/api/field_behavior.proto";
22+
import "google/api/resource.proto";
23+
import "google/cloud/aiplatform/v1/cached_content.proto";
24+
import "google/protobuf/empty.proto";
25+
import "google/protobuf/field_mask.proto";
26+
27+
option csharp_namespace = "Google.Cloud.AIPlatform.V1";
28+
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
29+
option java_multiple_files = true;
30+
option java_outer_classname = "GenAiCacheServiceProto";
31+
option java_package = "com.google.cloud.aiplatform.v1";
32+
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
33+
option ruby_package = "Google::Cloud::AIPlatform::V1";
34+
35+
// Service for managing Vertex AI's CachedContent resource.
36+
service GenAiCacheService {
37+
option (google.api.default_host) = "aiplatform.googleapis.com";
38+
option (google.api.oauth_scopes) =
39+
"https://www.googleapis.com/auth/cloud-platform";
40+
41+
// Creates cached content, this call will initialize the cached content in the
42+
// data storage, and users need to pay for the cache data storage.
43+
rpc CreateCachedContent(CreateCachedContentRequest) returns (CachedContent) {
44+
option (google.api.http) = {
45+
post: "/v1/{parent=projects/*/locations/*}/cachedContents"
46+
body: "cached_content"
47+
};
48+
option (google.api.method_signature) = "parent,cached_content";
49+
}
50+
51+
// Gets cached content configurations
52+
rpc GetCachedContent(GetCachedContentRequest) returns (CachedContent) {
53+
option (google.api.http) = {
54+
get: "/v1/{name=projects/*/locations/*/cachedContents/*}"
55+
};
56+
option (google.api.method_signature) = "name";
57+
}
58+
59+
// Updates cached content configurations
60+
rpc UpdateCachedContent(UpdateCachedContentRequest) returns (CachedContent) {
61+
option (google.api.http) = {
62+
patch: "/v1/{cached_content.name=projects/*/locations/*/cachedContents/*}"
63+
body: "cached_content"
64+
};
65+
option (google.api.method_signature) = "cached_content,update_mask";
66+
}
67+
68+
// Deletes cached content
69+
rpc DeleteCachedContent(DeleteCachedContentRequest)
70+
returns (google.protobuf.Empty) {
71+
option (google.api.http) = {
72+
delete: "/v1/{name=projects/*/locations/*/cachedContents/*}"
73+
};
74+
option (google.api.method_signature) = "name";
75+
}
76+
77+
// Lists cached contents in a project
78+
rpc ListCachedContents(ListCachedContentsRequest)
79+
returns (ListCachedContentsResponse) {
80+
option (google.api.http) = {
81+
get: "/v1/{parent=projects/*/locations/*}/cachedContents"
82+
};
83+
option (google.api.method_signature) = "parent";
84+
}
85+
}
86+
87+
// Request message for
88+
// [GenAiCacheService.CreateCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.CreateCachedContent].
89+
message CreateCachedContentRequest {
90+
// Required. The parent resource where the cached content will be created
91+
string parent = 1 [
92+
(google.api.field_behavior) = REQUIRED,
93+
(google.api.resource_reference) = {
94+
child_type: "aiplatform.googleapis.com/CachedContent"
95+
}
96+
];
97+
98+
// Required. The cached content to create
99+
CachedContent cached_content = 2 [(google.api.field_behavior) = REQUIRED];
100+
}
101+
102+
// Request message for
103+
// [GenAiCacheService.GetCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.GetCachedContent].
104+
message GetCachedContentRequest {
105+
// Required. The resource name referring to the cached content
106+
string name = 1 [
107+
(google.api.field_behavior) = REQUIRED,
108+
(google.api.resource_reference) = {
109+
type: "aiplatform.googleapis.com/CachedContent"
110+
}
111+
];
112+
}
113+
114+
// Request message for
115+
// [GenAiCacheService.UpdateCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.UpdateCachedContent].
116+
// Only expire_time or ttl can be updated.
117+
message UpdateCachedContentRequest {
118+
// Required. The cached content to update
119+
CachedContent cached_content = 1 [(google.api.field_behavior) = REQUIRED];
120+
121+
// Required. The list of fields to update.
122+
google.protobuf.FieldMask update_mask = 2
123+
[(google.api.field_behavior) = REQUIRED];
124+
}
125+
126+
// Request message for
127+
// [GenAiCacheService.DeleteCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.DeleteCachedContent].
128+
message DeleteCachedContentRequest {
129+
// Required. The resource name referring to the cached content
130+
string name = 1 [
131+
(google.api.field_behavior) = REQUIRED,
132+
(google.api.resource_reference) = {
133+
type: "aiplatform.googleapis.com/CachedContent"
134+
}
135+
];
136+
}
137+
138+
// Request to list CachedContents.
139+
message ListCachedContentsRequest {
140+
// Required. The parent, which owns this collection of cached contents.
141+
string parent = 1 [
142+
(google.api.field_behavior) = REQUIRED,
143+
(google.api.resource_reference) = {
144+
child_type: "aiplatform.googleapis.com/CachedContent"
145+
}
146+
];
147+
148+
// Optional. The maximum number of cached contents to return. The service may
149+
// return fewer than this value. If unspecified, some default (under maximum)
150+
// number of items will be returned. The maximum value is 1000; values above
151+
// 1000 will be coerced to 1000.
152+
int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
153+
154+
// Optional. A page token, received from a previous `ListCachedContents` call.
155+
// Provide this to retrieve the subsequent page.
156+
//
157+
// When paginating, all other parameters provided to `ListCachedContents` must
158+
// match the call that provided the page token.
159+
string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
160+
}
161+
162+
// Response with a list of CachedContents.
163+
message ListCachedContentsResponse {
164+
// List of cached contents.
165+
repeated CachedContent cached_contents = 1;
166+
167+
// A token, which can be sent as `page_token` to retrieve the next page.
168+
// If this field is omitted, there are no subsequent pages.
169+
string next_page_token = 2;
170+
}

google/cloud/aiplatform/v1/prediction_service.proto

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,18 @@ message GenerateContentRequest {
695695
optional Content system_instruction = 8
696696
[(google.api.field_behavior) = OPTIONAL];
697697

698+
// Optional. The name of the cached content used as context to serve the
699+
// prediction. Note: only used in explicit caching, where users can have
700+
// control over caching (e.g. what content to cache) and enjoy guaranteed cost
701+
// savings. Format:
702+
// `projects/{project}/locations/{location}/cachedContents/{cachedContent}`
703+
string cached_content = 9 [
704+
(google.api.field_behavior) = OPTIONAL,
705+
(google.api.resource_reference) = {
706+
type: "aiplatform.googleapis.com/CachedContent"
707+
}
708+
];
709+
698710
// Optional. A list of `Tools` the model may use to generate the next
699711
// response.
700712
//
@@ -772,6 +784,11 @@ message GenerateContentResponse {
772784

773785
// Total token count for prompt and response candidates.
774786
int32 total_token_count = 3;
787+
788+
// Output only. Number of tokens in the cached part in the input (the cached
789+
// content).
790+
int32 cached_content_token_count = 5
791+
[(google.api.field_behavior) = OUTPUT_ONLY];
775792
}
776793

777794
// Output only. Generated candidates.

0 commit comments

Comments
 (0)