feat: Add low latency journey option to proto

Google APIs · copybara-github · commit 10b8dc3b54c9 · 2024-10-09T17:32:29.000-07:00
feat: Add CustomPronunciationParams for upcoming feature work

PiperOrigin-RevId: 684229364
diff --git a/google/cloud/texttospeech/v1/cloud_tts.proto b/google/cloud/texttospeech/v1/cloud_tts.proto
@@ -151,6 +151,13 @@ message Voice {
   int32 natural_sample_rate_hertz = 4;
 }
 
+// Used for advanced voice options.
+message AdvancedVoiceOptions {
+  // Only for Journey voices. If false, the synthesis will be context aware
+  // and have higher latency.
+  optional bool low_latency_journey_synthesis = 1;
+}
+
 // The top-level message sent by the client for the `SynthesizeSpeech` method.
 message SynthesizeSpeechRequest {
   // Required. The Synthesizer requires either plain text or SSML as input.
@@ -161,6 +168,44 @@ message SynthesizeSpeechRequest {
 
   // Required. The configuration of the synthesized audio.
   AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];
+
+  // Advanced voice options.
+  optional AdvancedVoiceOptions advanced_voice_options = 8;
+}
+
+// Pronunciation customization for a phrase.
+message CustomPronunciationParams {
+  // The phonetic encoding of the phrase.
+  enum PhoneticEncoding {
+    // Not specified.
+    PHONETIC_ENCODING_UNSPECIFIED = 0;
+
+    // IPA. (e.g. apple -> ˈæpəl )
+    // https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
+    PHONETIC_ENCODING_IPA = 1;
+
+    // X-SAMPA (e.g. apple -> "{p@l" )
+    // https://en.wikipedia.org/wiki/X-SAMPA
+    PHONETIC_ENCODING_X_SAMPA = 2;
+  }
+
+  // The phrase to which the customization will be applied.
+  // The phrase can be multiple words (in the case of proper nouns etc), but
+  // should not span to a whole sentence.
+  optional string phrase = 1;
+
+  // The phonetic encoding of the phrase.
+  optional PhoneticEncoding phonetic_encoding = 2;
+
+  // The pronunciation of the phrase. This must be in the phonetic encoding
+  // specified above.
+  optional string pronunciation = 3;
+}
+
+// A collection of pronunciation customizations.
+message CustomPronunciations {
+  // The pronunciation customizations to be applied.
+  repeated CustomPronunciationParams pronunciations = 1;
 }
 
 // Contains text input to be synthesized. Either `text` or `ssml` must be
@@ -180,6 +225,21 @@ message SynthesisInput {
     // [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
     string ssml = 2;
   }
+
+  // Optional. The pronunciation customizations to be applied to the input. If
+  // this is set, the input will be synthesized using the given pronunciation
+  // customizations.
+  //
+  // The initial support will be for EFIGS (English, French,
+  // Italian, German, Spanish) languages, as provided in
+  // VoiceSelectionParams. Journey and Instant Clone voices are
+  // not supported yet.
+  //
+  // In order to customize the pronunciation of a phrase, there must be an exact
+  // match of the phrase in the input types. If using SSML, the phrase must not
+  // be inside a phoneme tag (entirely or partially).
+  CustomPronunciations custom_pronunciations = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Description of which voice to use for a synthesis request.