Skip to content

Commit b2cd70b

Browse files
Google APIscopybara-github
authored andcommitted
feat: add Gemini TTS Multispeaker API fields
PiperOrigin-RevId: 809186275
1 parent e8ed6b4 commit b2cd70b

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,28 @@ message MultiSpeakerMarkup {
255255
repeated Turn turns = 1 [(google.api.field_behavior) = REQUIRED];
256256
}
257257

258+
// Configuration for a single speaker in a Gemini TTS multi-speaker setup.
259+
// Enables dialogue between two speakers.
260+
message MultispeakerPrebuiltVoice {
261+
// Required. The speaker alias of the voice. This is the user-chosen speaker
262+
// name that is used in the multispeaker text input, such as "Speaker1".
263+
string speaker_alias = 1 [(google.api.field_behavior) = REQUIRED];
264+
265+
// Required. The speaker ID of the voice. See
266+
// https://cloud.google.com/text-to-speech/docs/gemini-tts#voice_options
267+
// for available values.
268+
string speaker_id = 2 [(google.api.field_behavior) = REQUIRED];
269+
}
270+
271+
// Configuration for a multi-speaker text-to-speech setup. Enables the use of up
272+
// to two distinct voices in a single synthesis request.
273+
message MultiSpeakerVoiceConfig {
274+
// Required. A list of configurations for the voices of the speakers. Exactly
275+
// two speaker voice configurations must be provided.
276+
repeated MultispeakerPrebuiltVoice speaker_voice_configs = 2
277+
[(google.api.field_behavior) = REQUIRED];
278+
}
279+
258280
// Contains text input to be synthesized. Either `text` or `ssml` must be
259281
// supplied. Supplying both or neither returns
260282
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. The
@@ -341,6 +363,12 @@ message VoiceSelectionParams {
341363
// Optional. The name of the model. If set, the service will choose the model
342364
// matching the specified configuration.
343365
string model_name = 6 [(google.api.field_behavior) = OPTIONAL];
366+
367+
// Optional. The configuration for a Gemini multi-speaker text-to-speech
368+
// setup. Enables the use of two distinct voices in a single synthesis
369+
// request.
370+
MultiSpeakerVoiceConfig multi_speaker_voice_config = 7
371+
[(google.api.field_behavior) = OPTIONAL];
344372
}
345373

346374
// Description of audio data to be synthesized.
@@ -498,6 +526,10 @@ message StreamingSynthesisInput {
498526
// Markup for HD voices specifically. This field may not be used with any
499527
// other voices.
500528
string markup = 5;
529+
530+
// Multi-speaker markup for Gemini TTS. This field may not
531+
// be used with any other voices.
532+
MultiSpeakerMarkup multi_speaker_markup = 7;
501533
}
502534

503535
// This is system instruction supported only for controllable voice models.

google/cloud/texttospeech/v1/texttospeech_v1.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ documentation:
1515
summary: |-
1616
Synthesizes natural-sounding speech by applying powerful neural network
1717
models.
18-
overview: |-
19-
# Introduction
20-
21-
Google Cloud Text-to-Speech API provides speech synthesis as a service.
2218
2319
http:
2420
rules:

0 commit comments

Comments
 (0)