Skip to content

Commit 195c051

Browse files
Google APIscopybara-github
authored andcommitted
feat: A new method StreamingSynthesize is added to service TextToSpeech
docs: A comment for field `name` in message `.google.cloud.texttospeech.v1beta1.VoiceSelectionParams` is changed PiperOrigin-RevId: 662978887
1 parent 569fc73 commit 195c051

2 files changed

Lines changed: 55 additions & 7 deletions

File tree

google/cloud/texttospeech/v1beta1/cloud_tts.proto

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ option go_package = "cloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;
2727
option java_multiple_files = true;
2828
option java_outer_classname = "TextToSpeechProto";
2929
option java_package = "com.google.cloud.texttospeech.v1beta1";
30+
option objc_class_prefix = "CTTS";
3031
option php_namespace = "Google\\Cloud\\TextToSpeech\\V1beta1";
3132
option ruby_package = "Google::Cloud::TextToSpeech::V1beta1";
3233
option (google.api.resource_definition) = {
@@ -58,6 +59,11 @@ service TextToSpeech {
5859
};
5960
option (google.api.method_signature) = "input,voice,audio_config";
6061
}
62+
63+
// Performs bidirectional streaming speech synthesis: receive audio while
64+
// sending text.
65+
rpc StreamingSynthesize(stream StreamingSynthesizeRequest)
66+
returns (stream StreamingSynthesizeResponse) {}
6167
}
6268

6369
// Gender of the voice as described in
@@ -206,8 +212,9 @@ message VoiceSelectionParams {
206212
// Bokmal) instead of "no" (Norwegian)".
207213
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
208214

209-
// The name of the voice. If not set, the service will choose a
210-
// voice based on the other parameters such as language_code and gender.
215+
// The name of the voice. If both the name and the gender are not set,
216+
// the service will choose a voice based on the other parameters such as
217+
// language_code.
211218
string name = 2;
212219

213220
// The preferred gender of the voice. If not set, the service will
@@ -334,3 +341,48 @@ message Timepoint {
334341
// Time offset in seconds from the start of the synthesized audio.
335342
double time_seconds = 3;
336343
}
344+
345+
// Provides configuration information for the StreamingSynthesize request.
346+
message StreamingSynthesizeConfig {
347+
// Required. The desired voice of the synthesized audio.
348+
VoiceSelectionParams voice = 1 [(google.api.field_behavior) = REQUIRED];
349+
}
350+
351+
// Input to be synthesized.
352+
message StreamingSynthesisInput {
353+
oneof input_source {
354+
// The raw text to be synthesized. It is recommended that each input
355+
// contains complete, terminating sentences, as this will likely result in
356+
// better prosody in the output audio. That being said, users are free to
357+
// input text however they please.
358+
string text = 1;
359+
}
360+
}
361+
362+
// Request message for the `StreamingSynthesize` method. Multiple
363+
// `StreamingSynthesizeRequest` messages are sent in one call.
364+
// The first message must contain a `streaming_config` that
365+
// fully specifies the request configuration and must not contain `input`. All
366+
// subsequent messages must only have `input` set.
367+
message StreamingSynthesizeRequest {
368+
// The request to be sent, either a StreamingSynthesizeConfig or
369+
// StreamingSynthesisInput.
370+
oneof streaming_request {
371+
// StreamingSynthesizeConfig to be used in this streaming attempt. Only
372+
// specified in the first message sent in a `StreamingSynthesize` call.
373+
StreamingSynthesizeConfig streaming_config = 1;
374+
375+
// Input to synthesize. Specified in all messages but the first in a
376+
// `StreamingSynthesize` call.
377+
StreamingSynthesisInput input = 2;
378+
}
379+
}
380+
381+
// `StreamingSynthesizeResponse` is the only message returned to the
382+
// client by `StreamingSynthesize` method. A series of zero or more
383+
// `StreamingSynthesizeResponse` messages are streamed back to the client.
384+
message StreamingSynthesizeResponse {
385+
// The audio data bytes encoded as specified in the request. This is
386+
// headerless LINEAR16 audio with a sample rate of 24000.
387+
bytes audio_content = 1;
388+
}

google/cloud/texttospeech/v1beta1/texttospeech_v1beta1.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ http:
2929

3030
authentication:
3131
rules:
32-
- selector: google.cloud.texttospeech.v1beta1.TextToSpeech.ListVoices
33-
oauth:
34-
canonical_scopes: |-
35-
https://www.googleapis.com/auth/cloud-platform
36-
- selector: google.cloud.texttospeech.v1beta1.TextToSpeech.SynthesizeSpeech
32+
- selector: 'google.cloud.texttospeech.v1beta1.TextToSpeech.*'
3733
oauth:
3834
canonical_scopes: |-
3935
https://www.googleapis.com/auth/cloud-platform

0 commit comments

Comments
 (0)