Skip to content

Commit 569fc73

Browse files
Google APIscopybara-github
authored andcommitted
feat: A new method StreamingSynthesize is added to service TextToSpeech
docs: A comment for field `name` in message `.google.cloud.texttospeech.v1.VoiceSelectionParams` is changed PiperOrigin-RevId: 662708702
1 parent 83e5198 commit 569fc73

2 files changed

Lines changed: 55 additions & 7 deletions

File tree

google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ option go_package = "cloud.google.com/go/texttospeech/apiv1/texttospeechpb;textt
2727
option java_multiple_files = true;
2828
option java_outer_classname = "TextToSpeechProto";
2929
option java_package = "com.google.cloud.texttospeech.v1";
30+
option objc_class_prefix = "CTTS";
3031
option php_namespace = "Google\\Cloud\\TextToSpeech\\V1";
3132
option ruby_package = "Google::Cloud::TextToSpeech::V1";
3233
option (google.api.resource_definition) = {
@@ -58,6 +59,11 @@ service TextToSpeech {
5859
};
5960
option (google.api.method_signature) = "input,voice,audio_config";
6061
}
62+
63+
// Performs bidirectional streaming speech synthesis: receive audio while
64+
// sending text.
65+
rpc StreamingSynthesize(stream StreamingSynthesizeRequest)
66+
returns (stream StreamingSynthesizeResponse) {}
6167
}
6268

6369
// Gender of the voice as described in
@@ -191,8 +197,9 @@ message VoiceSelectionParams {
191197
// Bokmal) instead of "no" (Norwegian)".
192198
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
193199

194-
// The name of the voice. If not set, the service will choose a
195-
// voice based on the other parameters such as language_code and gender.
200+
// The name of the voice. If both the name and the gender are not set,
201+
// the service will choose a voice based on the other parameters such as
202+
// language_code.
196203
string name = 2;
197204

198205
// The preferred gender of the voice. If not set, the service will
@@ -302,3 +309,48 @@ message SynthesizeSpeechResponse {
302309
// whereas JSON representations use base64.
303310
bytes audio_content = 1;
304311
}
312+
313+
// Provides configuration information for the StreamingSynthesize request.
314+
message StreamingSynthesizeConfig {
315+
// Required. The desired voice of the synthesized audio.
316+
VoiceSelectionParams voice = 1 [(google.api.field_behavior) = REQUIRED];
317+
}
318+
319+
// Input to be synthesized.
320+
message StreamingSynthesisInput {
321+
oneof input_source {
322+
// The raw text to be synthesized. It is recommended that each input
323+
// contains complete, terminating sentences, as this will likely result in
324+
// better prosody in the output audio. That being said, users are free to
325+
// input text however they please.
326+
string text = 1;
327+
}
328+
}
329+
330+
// Request message for the `StreamingSynthesize` method. Multiple
331+
// `StreamingSynthesizeRequest` messages are sent in one call.
332+
// The first message must contain a `streaming_config` that
333+
// fully specifies the request configuration and must not contain `input`. All
334+
// subsequent messages must only have `input` set.
335+
message StreamingSynthesizeRequest {
336+
// The request to be sent, either a StreamingSynthesizeConfig or
337+
// StreamingSynthesisInput.
338+
oneof streaming_request {
339+
// StreamingSynthesizeConfig to be used in this streaming attempt. Only
340+
// specified in the first message sent in a `StreamingSynthesize` call.
341+
StreamingSynthesizeConfig streaming_config = 1;
342+
343+
// Input to synthesize. Specified in all messages but the first in a
344+
// `StreamingSynthesize` call.
345+
StreamingSynthesisInput input = 2;
346+
}
347+
}
348+
349+
// `StreamingSynthesizeResponse` is the only message returned to the
350+
// client by `StreamingSynthesize` method. A series of zero or more
351+
// `StreamingSynthesizeResponse` messages are streamed back to the client.
352+
message StreamingSynthesizeResponse {
353+
// The audio data bytes encoded as specified in the request. This is
354+
// headerless LINEAR16 audio with a sample rate of 24000.
355+
bytes audio_content = 1;
356+
}

google/cloud/texttospeech/v1/texttospeech_v1.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,7 @@ http:
2929

3030
authentication:
3131
rules:
32-
- selector: google.cloud.texttospeech.v1.TextToSpeech.ListVoices
33-
oauth:
34-
canonical_scopes: |-
35-
https://www.googleapis.com/auth/cloud-platform
36-
- selector: google.cloud.texttospeech.v1.TextToSpeech.SynthesizeSpeech
32+
- selector: 'google.cloud.texttospeech.v1.TextToSpeech.*'
3733
oauth:
3834
canonical_scopes: |-
3935
https://www.googleapis.com/auth/cloud-platform

0 commit comments

Comments
 (0)