@@ -151,6 +151,13 @@ message Voice {
151151 int32 natural_sample_rate_hertz = 4 ;
152152}
153153
154+ // Used for advanced voice options.
155+ message AdvancedVoiceOptions {
156+ // Only for Journey voices. If false, the synthesis will be context aware
157+ // and have higher latency.
158+ optional bool low_latency_journey_synthesis = 1 ;
159+ }
160+
154161// The top-level message sent by the client for the `SynthesizeSpeech` method.
155162message SynthesizeSpeechRequest {
156163 // Required. The Synthesizer requires either plain text or SSML as input.
@@ -161,6 +168,44 @@ message SynthesizeSpeechRequest {
161168
162169 // Required. The configuration of the synthesized audio.
163170 AudioConfig audio_config = 3 [(google.api.field_behavior ) = REQUIRED ];
171+
172+ // Advanced voice options.
173+ optional AdvancedVoiceOptions advanced_voice_options = 8 ;
174+ }
175+
176+ // Pronunciation customization for a phrase.
177+ message CustomPronunciationParams {
178+ // The phonetic encoding of the phrase.
179+ enum PhoneticEncoding {
180+ // Not specified.
181+ PHONETIC_ENCODING_UNSPECIFIED = 0 ;
182+
183+ // IPA. (e.g. apple -> ˈæpəl )
184+ // https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
185+ PHONETIC_ENCODING_IPA = 1 ;
186+
187+ // X-SAMPA (e.g. apple -> "{p@l" )
188+ // https://en.wikipedia.org/wiki/X-SAMPA
189+ PHONETIC_ENCODING_X_SAMPA = 2 ;
190+ }
191+
192+ // The phrase to which the customization will be applied.
193+ // The phrase can be multiple words (in the case of proper nouns etc), but
194+ // should not span to a whole sentence.
195+ optional string phrase = 1 ;
196+
197+ // The phonetic encoding of the phrase.
198+ optional PhoneticEncoding phonetic_encoding = 2 ;
199+
200+ // The pronunciation of the phrase. This must be in the phonetic encoding
201+ // specified above.
202+ optional string pronunciation = 3 ;
203+ }
204+
205+ // A collection of pronunciation customizations.
206+ message CustomPronunciations {
207+ // The pronunciation customizations to be applied.
208+ repeated CustomPronunciationParams pronunciations = 1 ;
164209}
165210
166211// Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -180,6 +225,21 @@ message SynthesisInput {
180225 // [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
181226 string ssml = 2 ;
182227 }
228+
229+ // Optional. The pronunciation customizations to be applied to the input. If
230+ // this is set, the input will be synthesized using the given pronunciation
231+ // customizations.
232+ //
233+ // The initial support will be for EFIGS (English, French,
234+ // Italian, German, Spanish) languages, as provided in
235+ // VoiceSelectionParams. Journey and Instant Clone voices are
236+ // not supported yet.
237+ //
238+ // In order to customize the pronunciation of a phrase, there must be an exact
239+ // match of the phrase in the input types. If using SSML, the phrase must not
240+ // be inside a phoneme tag (entirely or partially).
241+ CustomPronunciations custom_pronunciations = 3
242+ [(google.api.field_behavior ) = OPTIONAL ];
183243}
184244
185245// Description of which voice to use for a synthesis request.
0 commit comments