Skip to content

Commit c821335

Browse files
Google APIscopybara-github
authored andcommitted
feat: add Gemini TTS Multispeaker API fields
PiperOrigin-RevId: 810470893
1 parent 5280c6b commit c821335

2 files changed

Lines changed: 32 additions & 4 deletions

File tree

google/cloud/texttospeech/v1beta1/cloud_tts.proto

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,28 @@ message MultiSpeakerMarkup {
270270
repeated Turn turns = 1 [(google.api.field_behavior) = REQUIRED];
271271
}
272272

273+
// Configuration for a single speaker in a Gemini TTS multi-speaker setup.
274+
// Enables dialogue between two speakers.
275+
message MultispeakerPrebuiltVoice {
276+
// Required. The speaker alias of the voice. This is the user-chosen speaker
277+
// name that is used in the multispeaker text input, such as "Speaker1".
278+
string speaker_alias = 1 [(google.api.field_behavior) = REQUIRED];
279+
280+
// Required. The speaker ID of the voice. See
281+
// https://cloud.google.com/text-to-speech/docs/gemini-tts#voice_options
282+
// for available values.
283+
string speaker_id = 2 [(google.api.field_behavior) = REQUIRED];
284+
}
285+
286+
// Configuration for a multi-speaker text-to-speech setup. Enables the use of up
287+
// to two distinct voices in a single synthesis request.
288+
message MultiSpeakerVoiceConfig {
289+
// Required. A list of configurations for the voices of the speakers. Exactly
290+
// two speaker voice configurations must be provided.
291+
repeated MultispeakerPrebuiltVoice speaker_voice_configs = 2
292+
[(google.api.field_behavior) = REQUIRED];
293+
}
294+
273295
// Contains text input to be synthesized. Either `text` or `ssml` must be
274296
// supplied. Supplying both or neither returns
275297
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. The
@@ -356,6 +378,12 @@ message VoiceSelectionParams {
356378
// Optional. The name of the model. If set, the service will choose the model
357379
// matching the specified configuration.
358380
string model_name = 6 [(google.api.field_behavior) = OPTIONAL];
381+
382+
// Optional. The configuration for a Gemini multi-speaker text-to-speech
383+
// setup. Enables the use of two distinct voices in a single synthesis
384+
// request.
385+
MultiSpeakerVoiceConfig multi_speaker_voice_config = 7
386+
[(google.api.field_behavior) = OPTIONAL];
359387
}
360388

361389
// Description of audio data to be synthesized.
@@ -530,6 +558,10 @@ message StreamingSynthesisInput {
530558
// Markup for HD voices specifically. This field may not be used with any
531559
// other voices.
532560
string markup = 5;
561+
562+
// Multi-speaker markup for Gemini TTS. This field may not
563+
// be used with any other voices.
564+
MultiSpeakerMarkup multi_speaker_markup = 7;
533565
}
534566

535567
// This is system instruction supported only for controllable voice models.

google/cloud/texttospeech/v1beta1/texttospeech_v1beta1.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@ documentation:
1515
summary: |-
1616
Synthesizes natural-sounding speech by applying powerful neural network
1717
models.
18-
overview: |-
19-
# Introduction
20-
21-
Google Cloud Text-to-Speech API provides speech synthesis as a service.
2218
2319
http:
2420
rules:

0 commit comments

Comments
 (0)