feat: add per-modality token count break downs for GenAI APIs

Google APIs · copybara-github · commit 0e56139efe20 · 2025-01-24T11:10:02.000-08:00
PiperOrigin-RevId: 719368111
diff --git a/google/cloud/aiplatform/v1beta1/content.proto b/google/cloud/aiplatform/v1beta1/content.proto
@@ -52,6 +52,27 @@ enum HarmCategory {
   HARM_CATEGORY_CIVIC_INTEGRITY = 5;
 }
 
+// Content Part modality
+enum Modality {
+  // Unspecified modality.
+  MODALITY_UNSPECIFIED = 0;
+
+  // Plain text.
+  TEXT = 1;
+
+  // Image.
+  IMAGE = 2;
+
+  // Video.
+  VIDEO = 3;
+
+  // Audio.
+  AUDIO = 4;
+
+  // Document, e.g. PDF.
+  DOCUMENT = 5;
+}
+
 // The base structured datatype containing multi-part content of a message.
 //
 // A `Content` includes a `role` field designating the producer of the `Content`
@@ -671,3 +692,12 @@ message RetrievalMetadata {
   float google_search_dynamic_retrieval_score = 2
       [(google.api.field_behavior) = OPTIONAL];
 }
+
+// Represents token counting info for a single modality.
+message ModalityTokenCount {
+  // The modality associated with this token count.
+  Modality modality = 1;
+
+  // Number of tokens.
+  int32 token_count = 2;
+}
diff --git a/google/cloud/aiplatform/v1beta1/prediction_service.proto b/google/cloud/aiplatform/v1beta1/prediction_service.proto
@@ -730,6 +730,10 @@ message CountTokensResponse {
   // The total number of billable characters counted across all instances from
   // the request.
   int32 total_billable_characters = 2;
+
+  // Output only. List of modalities that were processed in the request input.
+  repeated ModalityTokenCount prompt_tokens_details = 3
+      [(google.api.field_behavior) = OUTPUT_ONLY];
 }
 
 // Request message for [PredictionService.GenerateContent].
@@ -851,6 +855,19 @@ message GenerateContentResponse {
     // content).
     int32 cached_content_token_count = 5
         [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. List of modalities that were processed in the request input.
+    repeated ModalityTokenCount prompt_tokens_details = 9
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. List of modalities of the cached content in the request
+    // input.
+    repeated ModalityTokenCount cache_tokens_details = 10
+        [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. List of modalities that were returned in the response.
+    repeated ModalityTokenCount candidates_tokens_details = 11
+        [(google.api.field_behavior) = OUTPUT_ONLY];
   }
 
   // Output only. Generated candidates.