Skip to content

Commit d6f9dbc

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add low latency journey option to proto
feat: Add CustomPronunciationParams for upcoming feature work PiperOrigin-RevId: 684207737
1 parent d992b06 commit d6f9dbc

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

google/cloud/texttospeech/v1beta1/cloud_tts.proto

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ message Voice {
154154
int32 natural_sample_rate_hertz = 4;
155155
}
156156

157+
// Used for advanced voice options.
158+
message AdvancedVoiceOptions {
159+
// Only for Journey voices. If false, the synthesis will be context aware
160+
// and have higher latency.
161+
optional bool low_latency_journey_synthesis = 1;
162+
}
163+
157164
// The top-level message sent by the client for the `SynthesizeSpeech` method.
158165
message SynthesizeSpeechRequest {
159166
// The type of timepoint information that is returned in the response.
@@ -176,6 +183,44 @@ message SynthesizeSpeechRequest {
176183

177184
// Whether and what timepoints are returned in the response.
178185
repeated TimepointType enable_time_pointing = 4;
186+
187+
// Advanced voice options.
188+
optional AdvancedVoiceOptions advanced_voice_options = 8;
189+
}
190+
191+
// Pronunciation customization for a phrase.
192+
message CustomPronunciationParams {
193+
// The phonetic encoding of the phrase.
194+
enum PhoneticEncoding {
195+
// Not specified.
196+
PHONETIC_ENCODING_UNSPECIFIED = 0;
197+
198+
// IPA. (e.g. apple -> ˈæpəl )
199+
// https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
200+
PHONETIC_ENCODING_IPA = 1;
201+
202+
// X-SAMPA (e.g. apple -> "{p@l" )
203+
// https://en.wikipedia.org/wiki/X-SAMPA
204+
PHONETIC_ENCODING_X_SAMPA = 2;
205+
}
206+
207+
// The phrase to which the customization will be applied.
208+
// The phrase can be multiple words (in the case of proper nouns etc), but
209+
// should not span to a whole sentence.
210+
optional string phrase = 1;
211+
212+
// The phonetic encoding of the phrase.
213+
optional PhoneticEncoding phonetic_encoding = 2;
214+
215+
// The pronunciation of the phrase. This must be in the phonetic encoding
216+
// specified above.
217+
optional string pronunciation = 3;
218+
}
219+
220+
// A collection of pronunciation customizations.
221+
message CustomPronunciations {
222+
// The pronunciation customizations to be applied.
223+
repeated CustomPronunciationParams pronunciations = 1;
179224
}
180225

181226
// Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -195,6 +240,21 @@ message SynthesisInput {
195240
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
196241
string ssml = 2;
197242
}
243+
244+
// Optional. The pronunciation customizations to be applied to the input. If
245+
// this is set, the input will be synthesized using the given pronunciation
246+
// customizations.
247+
//
248+
// The initial support will be for EFIGS (English, French,
249+
// Italian, German, Spanish) languages, as provided in
250+
// VoiceSelectionParams. Journey and Instant Clone voices are
251+
// not supported yet.
252+
//
253+
// In order to customize the pronunciation of a phrase, there must be an exact
254+
// match of the phrase in the input types. If using SSML, the phrase must not
255+
// be inside a phoneme tag (entirely or partially).
256+
CustomPronunciations custom_pronunciations = 3
257+
[(google.api.field_behavior) = OPTIONAL];
198258
}
199259

200260
// Description of which voice to use for a synthesis request.

0 commit comments

Comments
 (0)