@@ -154,6 +154,13 @@ message Voice {
154154 int32 natural_sample_rate_hertz = 4 ;
155155}
156156
157+ // Used for advanced voice options.
158+ message AdvancedVoiceOptions {
159+ // Only for Journey voices. If false, the synthesis will be context aware
160+ // and have higher latency.
161+ optional bool low_latency_journey_synthesis = 1 ;
162+ }
163+
157164// The top-level message sent by the client for the `SynthesizeSpeech` method.
158165message SynthesizeSpeechRequest {
159166 // The type of timepoint information that is returned in the response.
@@ -176,6 +183,44 @@ message SynthesizeSpeechRequest {
176183
177184 // Whether and what timepoints are returned in the response.
178185 repeated TimepointType enable_time_pointing = 4 ;
186+
187+ // Advanced voice options.
188+ optional AdvancedVoiceOptions advanced_voice_options = 8 ;
189+ }
190+
191+ // Pronunciation customization for a phrase.
192+ message CustomPronunciationParams {
193+ // The phonetic encoding of the phrase.
194+ enum PhoneticEncoding {
195+ // Not specified.
196+ PHONETIC_ENCODING_UNSPECIFIED = 0 ;
197+
198+ // IPA. (e.g. apple -> ˈæpəl )
199+ // https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
200+ PHONETIC_ENCODING_IPA = 1 ;
201+
202+ // X-SAMPA (e.g. apple -> "{p@l" )
203+ // https://en.wikipedia.org/wiki/X-SAMPA
204+ PHONETIC_ENCODING_X_SAMPA = 2 ;
205+ }
206+
207+ // The phrase to which the customization will be applied.
208+ // The phrase can be multiple words (in the case of proper nouns etc), but
209+ // should not span to a whole sentence.
210+ optional string phrase = 1 ;
211+
212+ // The phonetic encoding of the phrase.
213+ optional PhoneticEncoding phonetic_encoding = 2 ;
214+
215+ // The pronunciation of the phrase. This must be in the phonetic encoding
216+ // specified above.
217+ optional string pronunciation = 3 ;
218+ }
219+
220+ // A collection of pronunciation customizations.
221+ message CustomPronunciations {
222+ // The pronunciation customizations to be applied.
223+ repeated CustomPronunciationParams pronunciations = 1 ;
179224}
180225
181226// Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -195,6 +240,21 @@ message SynthesisInput {
195240 // [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
196241 string ssml = 2 ;
197242 }
243+
244+ // Optional. The pronunciation customizations to be applied to the input. If
245+ // this is set, the input will be synthesized using the given pronunciation
246+ // customizations.
247+ //
248+ // The initial support will be for EFIGS (English, French,
249+ // Italian, German, Spanish) languages, as provided in
250+ // VoiceSelectionParams. Journey and Instant Clone voices are
251+ // not supported yet.
252+ //
253+ // In order to customize the pronunciation of a phrase, there must be an exact
254+ // match of the phrase in the input types. If using SSML, the phrase must not
255+ // be inside a phoneme tag (entirely or partially).
256+ CustomPronunciations custom_pronunciations = 3
257+ [(google.api.field_behavior ) = OPTIONAL ];
198258}
199259
200260// Description of which voice to use for a synthesis request.
0 commit comments