Skip to content

Commit 10b8dc3

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add low latency journey option to proto
feat: Add CustomPronunciationParams for upcoming feature work PiperOrigin-RevId: 684229364
1 parent 72be138 commit 10b8dc3

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

google/cloud/texttospeech/v1/cloud_tts.proto

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,13 @@ message Voice {
151151
int32 natural_sample_rate_hertz = 4;
152152
}
153153

154+
// Used for advanced voice options.
155+
message AdvancedVoiceOptions {
156+
// Only for Journey voices. If false, the synthesis will be context aware
157+
// and have higher latency.
158+
optional bool low_latency_journey_synthesis = 1;
159+
}
160+
154161
// The top-level message sent by the client for the `SynthesizeSpeech` method.
155162
message SynthesizeSpeechRequest {
156163
// Required. The Synthesizer requires either plain text or SSML as input.
@@ -161,6 +168,44 @@ message SynthesizeSpeechRequest {
161168

162169
// Required. The configuration of the synthesized audio.
163170
AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];
171+
172+
// Advanced voice options.
173+
optional AdvancedVoiceOptions advanced_voice_options = 8;
174+
}
175+
176+
// Pronunciation customization for a phrase.
177+
message CustomPronunciationParams {
178+
// The phonetic encoding of the phrase.
179+
enum PhoneticEncoding {
180+
// Not specified.
181+
PHONETIC_ENCODING_UNSPECIFIED = 0;
182+
183+
// IPA. (e.g. apple -> ˈæpəl )
184+
// https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
185+
PHONETIC_ENCODING_IPA = 1;
186+
187+
// X-SAMPA (e.g. apple -> "{p@l" )
188+
// https://en.wikipedia.org/wiki/X-SAMPA
189+
PHONETIC_ENCODING_X_SAMPA = 2;
190+
}
191+
192+
// The phrase to which the customization will be applied.
193+
// The phrase can be multiple words (in the case of proper nouns etc), but
194+
// should not span to a whole sentence.
195+
optional string phrase = 1;
196+
197+
// The phonetic encoding of the phrase.
198+
optional PhoneticEncoding phonetic_encoding = 2;
199+
200+
// The pronunciation of the phrase. This must be in the phonetic encoding
201+
// specified above.
202+
optional string pronunciation = 3;
203+
}
204+
205+
// A collection of pronunciation customizations.
206+
message CustomPronunciations {
207+
// The pronunciation customizations to be applied.
208+
repeated CustomPronunciationParams pronunciations = 1;
164209
}
165210

166211
// Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -180,6 +225,21 @@ message SynthesisInput {
180225
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
181226
string ssml = 2;
182227
}
228+
229+
// Optional. The pronunciation customizations to be applied to the input. If
230+
// this is set, the input will be synthesized using the given pronunciation
231+
// customizations.
232+
//
233+
// The initial support will be for EFIGS (English, French,
234+
// Italian, German, Spanish) languages, as provided in
235+
// VoiceSelectionParams. Journey and Instant Clone voices are
236+
// not supported yet.
237+
//
238+
// In order to customize the pronunciation of a phrase, there must be an exact
239+
// match of the phrase in the input types. If using SSML, the phrase must not
240+
// be inside a phoneme tag (entirely or partially).
241+
CustomPronunciations custom_pronunciations = 3
242+
[(google.api.field_behavior) = OPTIONAL];
183243
}
184244

185245
// Description of which voice to use for a synthesis request.

0 commit comments

Comments
 (0)