feat: A new method StreamingSynthesize is added to service TextToSpeech

Google APIs · copybara-github · commit 195c05137436 · 2024-08-14T12:06:46.000-07:00
docs: A comment for field `name` in message `.google.cloud.texttospeech.v1beta1.VoiceSelectionParams` is changed
PiperOrigin-RevId: 662978887
diff --git a/google/cloud/texttospeech/v1beta1/cloud_tts.proto b/google/cloud/texttospeech/v1beta1/cloud_tts.proto
@@ -27,6 +27,7 @@ option go_package = "cloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;
 option java_multiple_files = true;
 option java_outer_classname = "TextToSpeechProto";
 option java_package = "com.google.cloud.texttospeech.v1beta1";
+option objc_class_prefix = "CTTS";
 option php_namespace = "Google\\Cloud\\TextToSpeech\\V1beta1";
 option ruby_package = "Google::Cloud::TextToSpeech::V1beta1";
 option (google.api.resource_definition) = {
@@ -58,6 +59,11 @@ service TextToSpeech {
     };
     option (google.api.method_signature) = "input,voice,audio_config";
   }
+
+  // Performs bidirectional streaming speech synthesis: receive audio while
+  // sending text.
+  rpc StreamingSynthesize(stream StreamingSynthesizeRequest)
+      returns (stream StreamingSynthesizeResponse) {}
 }
 
 // Gender of the voice as described in
@@ -206,8 +212,9 @@ message VoiceSelectionParams {
   // Bokmal) instead of "no" (Norwegian)".
   string language_code = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // The name of the voice. If not set, the service will choose a
-  // voice based on the other parameters such as language_code and gender.
+  // The name of the voice. If both the name and the gender are not set,
+  // the service will choose a voice based on the other parameters such as
+  // language_code.
   string name = 2;
 
   // The preferred gender of the voice. If not set, the service will
@@ -334,3 +341,48 @@ message Timepoint {
   // Time offset in seconds from the start of the synthesized audio.
   double time_seconds = 3;
 }
+
+// Provides configuration information for the StreamingSynthesize request.
+message StreamingSynthesizeConfig {
+  // Required. The desired voice of the synthesized audio.
+  VoiceSelectionParams voice = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Input to be synthesized.
+message StreamingSynthesisInput {
+  oneof input_source {
+    // The raw text to be synthesized. It is recommended that each input
+    // contains complete, terminating sentences, as this will likely result in
+    // better prosody in the output audio. That being said, users are free to
+    // input text however they please.
+    string text = 1;
+  }
+}
+
+// Request message for the `StreamingSynthesize` method. Multiple
+// `StreamingSynthesizeRequest` messages are sent in one call.
+// The first message must contain a `streaming_config` that
+// fully specifies the request configuration and must not contain `input`. All
+// subsequent messages must only have `input` set.
+message StreamingSynthesizeRequest {
+  // The request to be sent, either a StreamingSynthesizeConfig or
+  // StreamingSynthesisInput.
+  oneof streaming_request {
+    // StreamingSynthesizeConfig to be used in this streaming attempt. Only
+    // specified in the first message sent in a `StreamingSynthesize` call.
+    StreamingSynthesizeConfig streaming_config = 1;
+
+    // Input to synthesize. Specified in all messages but the first in a
+    // `StreamingSynthesize` call.
+    StreamingSynthesisInput input = 2;
+  }
+}
+
+// `StreamingSynthesizeResponse` is the only message returned to the
+// client by `StreamingSynthesize` method. A series of zero or more
+// `StreamingSynthesizeResponse` messages are streamed back to the client.
+message StreamingSynthesizeResponse {
+  // The audio data bytes encoded as specified in the request. This is
+  // headerless LINEAR16 audio with a sample rate of 24000.
+  bytes audio_content = 1;
+}
diff --git a/google/cloud/texttospeech/v1beta1/texttospeech_v1beta1.yaml b/google/cloud/texttospeech/v1beta1/texttospeech_v1beta1.yaml
@@ -29,11 +29,7 @@ http:
 
 authentication:
   rules:
-  - selector: google.cloud.texttospeech.v1beta1.TextToSpeech.ListVoices
-    oauth:
-      canonical_scopes: |-
-        https://www.googleapis.com/auth/cloud-platform
-  - selector: google.cloud.texttospeech.v1beta1.TextToSpeech.SynthesizeSpeech
+  - selector: 'google.cloud.texttospeech.v1beta1.TextToSpeech.*'
     oauth:
       canonical_scopes: |-
         https://www.googleapis.com/auth/cloud-platform