feat: add Context Cache to v1

Google APIs · copybara-github · commit 51b75ed8bf6d · 2025-01-12T23:33:06.000-08:00
PiperOrigin-RevId: 714842212
diff --git a/google/cloud/aiplatform/v1/BUILD.bazel b/google/cloud/aiplatform/v1/BUILD.bazel
@@ -34,6 +34,7 @@ proto_library(
         "api_auth.proto",
         "artifact.proto",
         "batch_prediction_job.proto",
+        "cached_content.proto",
         "completion_stats.proto",
         "content.proto",
         "context.proto",
@@ -72,6 +73,7 @@ proto_library(
         "featurestore_monitoring.proto",
         "featurestore_online_service.proto",
         "featurestore_service.proto",
+        "gen_ai_cache_service.proto",
         "genai_tuning_service.proto",
         "hyperparameter_tuning_job.proto",
         "index.proto",
diff --git a/google/cloud/aiplatform/v1/aiplatform_v1.yaml b/google/cloud/aiplatform/v1/aiplatform_v1.yaml
@@ -13,6 +13,7 @@ apis:
 - name: google.cloud.aiplatform.v1.FeatureRegistryService
 - name: google.cloud.aiplatform.v1.FeaturestoreOnlineServingService
 - name: google.cloud.aiplatform.v1.FeaturestoreService
+- name: google.cloud.aiplatform.v1.GenAiCacheService
 - name: google.cloud.aiplatform.v1.GenAiTuningService
 - name: google.cloud.aiplatform.v1.IndexEndpointService
 - name: google.cloud.aiplatform.v1.IndexService
@@ -749,6 +750,10 @@ authentication:
     oauth:
       canonical_scopes: |-
         https://www.googleapis.com/auth/cloud-platform
+  - selector: 'google.cloud.aiplatform.v1.GenAiCacheService.*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
   - selector: 'google.cloud.aiplatform.v1.GenAiTuningService.*'
     oauth:
       canonical_scopes: |-
diff --git a/google/cloud/aiplatform/v1/cached_content.proto b/google/cloud/aiplatform/v1/cached_content.proto
@@ -0,0 +1,136 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/aiplatform/v1/content.proto";
+import "google/cloud/aiplatform/v1/tool.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "CachedContentProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+
+// A resource used in LLM queries for users to explicitly specify what to cache
+// and how to cache.
+message CachedContent {
+  option (google.api.resource) = {
+    type: "aiplatform.googleapis.com/CachedContent"
+    pattern: "projects/{project}/locations/{location}/cachedContents/{cached_content}"
+    plural: "cachedContents"
+    singular: "cachedContent"
+  };
+
+  // Metadata on the usage of the cached content.
+  message UsageMetadata {
+    // Total number of tokens that the cached content consumes.
+    int32 total_token_count = 1;
+
+    // Number of text characters.
+    int32 text_count = 2;
+
+    // Number of images.
+    int32 image_count = 3;
+
+    // Duration of video in seconds.
+    int32 video_duration_seconds = 4;
+
+    // Duration of audio in seconds.
+    int32 audio_duration_seconds = 5;
+  }
+
+  // Expiration time of the cached content.
+  oneof expiration {
+    // Timestamp of when this resource is considered expired.
+    // This is *always* provided on output, regardless of what was sent
+    // on input.
+    google.protobuf.Timestamp expire_time = 9;
+
+    // Input only. The TTL for this resource. The expiration time is computed:
+    // now + TTL.
+    google.protobuf.Duration ttl = 10
+        [(google.api.field_behavior) = INPUT_ONLY];
+  }
+
+  // Immutable. Identifier. The server-generated resource name of the cached
+  // content Format:
+  // projects/{project}/locations/{location}/cachedContents/{cached_content}
+  string name = 1 [
+    (google.api.field_behavior) = IDENTIFIER,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
+  // Optional. Immutable. The user-generated meaningful display name of the
+  // cached content.
+  string display_name = 11 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE
+  ];
+
+  // Immutable. The name of the publisher model to use for cached content.
+  // Format:
+  // projects/{project}/locations/{location}/publishers/{publisher}/models/{model}
+  string model = 2 [(google.api.field_behavior) = IMMUTABLE];
+
+  // Optional. Input only. Immutable. Developer set system instruction.
+  // Currently, text only
+  Content system_instruction = 3 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. The content to cache
+  repeated Content contents = 4 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. A list of `Tools` the model may use to
+  // generate the next response
+  repeated Tool tools = 5 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Optional. Input only. Immutable. Tool config. This config is shared for all
+  // tools
+  ToolConfig tool_config = 6 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.field_behavior) = IMMUTABLE,
+    (google.api.field_behavior) = INPUT_ONLY
+  ];
+
+  // Output only. Creatation time of the cache entry.
+  google.protobuf.Timestamp create_time = 7
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. When the cache entry was last updated in UTC time.
+  google.protobuf.Timestamp update_time = 8
+      [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Metadata on the usage of the cached content.
+  UsageMetadata usage_metadata = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
diff --git a/google/cloud/aiplatform/v1/gen_ai_cache_service.proto b/google/cloud/aiplatform/v1/gen_ai_cache_service.proto
@@ -0,0 +1,170 @@
+// Copyright 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.aiplatform.v1;
+
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/api/resource.proto";
+import "google/cloud/aiplatform/v1/cached_content.proto";
+import "google/protobuf/empty.proto";
+import "google/protobuf/field_mask.proto";
+
+option csharp_namespace = "Google.Cloud.AIPlatform.V1";
+option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
+option java_multiple_files = true;
+option java_outer_classname = "GenAiCacheServiceProto";
+option java_package = "com.google.cloud.aiplatform.v1";
+option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
+option ruby_package = "Google::Cloud::AIPlatform::V1";
+
+// Service for managing Vertex AI's CachedContent resource.
+service GenAiCacheService {
+  option (google.api.default_host) = "aiplatform.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
+  // Creates cached content, this call will initialize the cached content in the
+  // data storage, and users need to pay for the cache data storage.
+  rpc CreateCachedContent(CreateCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      post: "/v1/{parent=projects/*/locations/*}/cachedContents"
+      body: "cached_content"
+    };
+    option (google.api.method_signature) = "parent,cached_content";
+  }
+
+  // Gets cached content configurations
+  rpc GetCachedContent(GetCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      get: "/v1/{name=projects/*/locations/*/cachedContents/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Updates cached content configurations
+  rpc UpdateCachedContent(UpdateCachedContentRequest) returns (CachedContent) {
+    option (google.api.http) = {
+      patch: "/v1/{cached_content.name=projects/*/locations/*/cachedContents/*}"
+      body: "cached_content"
+    };
+    option (google.api.method_signature) = "cached_content,update_mask";
+  }
+
+  // Deletes cached content
+  rpc DeleteCachedContent(DeleteCachedContentRequest)
+      returns (google.protobuf.Empty) {
+    option (google.api.http) = {
+      delete: "/v1/{name=projects/*/locations/*/cachedContents/*}"
+    };
+    option (google.api.method_signature) = "name";
+  }
+
+  // Lists cached contents in a project
+  rpc ListCachedContents(ListCachedContentsRequest)
+      returns (ListCachedContentsResponse) {
+    option (google.api.http) = {
+      get: "/v1/{parent=projects/*/locations/*}/cachedContents"
+    };
+    option (google.api.method_signature) = "parent";
+  }
+}
+
+// Request message for
+// [GenAiCacheService.CreateCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.CreateCachedContent].
+message CreateCachedContentRequest {
+  // Required. The parent resource where the cached content will be created
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      child_type: "aiplatform.googleapis.com/CachedContent"
+    }
+  ];
+
+  // Required. The cached content to create
+  CachedContent cached_content = 2 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Request message for
+// [GenAiCacheService.GetCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.GetCachedContent].
+message GetCachedContentRequest {
+  // Required. The resource name referring to the cached content
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/CachedContent"
+    }
+  ];
+}
+
+// Request message for
+// [GenAiCacheService.UpdateCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.UpdateCachedContent].
+// Only expire_time or ttl can be updated.
+message UpdateCachedContentRequest {
+  // Required. The cached content to update
+  CachedContent cached_content = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. The list of fields to update.
+  google.protobuf.FieldMask update_mask = 2
+      [(google.api.field_behavior) = REQUIRED];
+}
+
+// Request message for
+// [GenAiCacheService.DeleteCachedContent][google.cloud.aiplatform.v1.GenAiCacheService.DeleteCachedContent].
+message DeleteCachedContentRequest {
+  // Required. The resource name referring to the cached content
+  string name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/CachedContent"
+    }
+  ];
+}
+
+// Request to list CachedContents.
+message ListCachedContentsRequest {
+  // Required. The parent, which owns this collection of cached contents.
+  string parent = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      child_type: "aiplatform.googleapis.com/CachedContent"
+    }
+  ];
+
+  // Optional. The maximum number of cached contents to return. The service may
+  // return fewer than this value. If unspecified, some default (under maximum)
+  // number of items will be returned. The maximum value is 1000; values above
+  // 1000 will be coerced to 1000.
+  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. A page token, received from a previous `ListCachedContents` call.
+  // Provide this to retrieve the subsequent page.
+  //
+  // When paginating, all other parameters provided to `ListCachedContents` must
+  // match the call that provided the page token.
+  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Response with a list of CachedContents.
+message ListCachedContentsResponse {
+  // List of cached contents.
+  repeated CachedContent cached_contents = 1;
+
+  // A token, which can be sent as `page_token` to retrieve the next page.
+  // If this field is omitted, there are no subsequent pages.
+  string next_page_token = 2;
+}
diff --git a/google/cloud/aiplatform/v1/prediction_service.proto b/google/cloud/aiplatform/v1/prediction_service.proto
@@ -695,6 +695,18 @@ message GenerateContentRequest {
   optional Content system_instruction = 8
       [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. The name of the cached content used as context to serve the
+  // prediction. Note: only used in explicit caching, where users can have
+  // control over caching (e.g. what content to cache) and enjoy guaranteed cost
+  // savings. Format:
+  // `projects/{project}/locations/{location}/cachedContents/{cachedContent}`
+  string cached_content = 9 [
+    (google.api.field_behavior) = OPTIONAL,
+    (google.api.resource_reference) = {
+      type: "aiplatform.googleapis.com/CachedContent"
+    }
+  ];
+
   // Optional. A list of `Tools` the model may use to generate the next
   // response.
   //
@@ -772,6 +784,11 @@ message GenerateContentResponse {
 
     // Total token count for prompt and response candidates.
     int32 total_token_count = 3;
+
+    // Output only. Number of tokens in the cached part in the input (the cached
+    // content).
+    int32 cached_content_token_count = 5
+        [(google.api.field_behavior) = OUTPUT_ONLY];
   }
 
   // Output only. Generated candidates.