@@ -16,11 +16,11 @@ syntax = "proto3";
1616
1717package google.cloud.dialogflow.cx.v3 ;
1818
19+ import "google/api/annotations.proto" ;
1920import "google/api/field_behavior.proto" ;
2021import "google/api/resource.proto" ;
2122import "google/protobuf/duration.proto" ;
2223import "google/protobuf/timestamp.proto" ;
23- import "google/api/annotations.proto" ;
2424
2525option cc_enable_arenas = true ;
2626option csharp_namespace = "Google.Cloud.Dialogflow.Cx.V3" ;
@@ -80,6 +80,49 @@ enum AudioEncoding {
8080 AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7 ;
8181}
8282
83+ // Variant of the specified [Speech model][google.cloud.dialogflow.cx.v3.InputAudioConfig.model] to use.
84+ //
85+ // See the [Cloud Speech
86+ // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
87+ // for which models have different variants. For example, the "phone_call" model
88+ // has both a standard and an enhanced variant. When you use an enhanced model,
89+ // you will generally receive higher quality results than for a standard model.
90+ enum SpeechModelVariant {
91+ // No model variant specified. In this case Dialogflow defaults to
92+ // USE_BEST_AVAILABLE.
93+ SPEECH_MODEL_VARIANT_UNSPECIFIED = 0 ;
94+
95+ // Use the best available variant of the [Speech
96+ // model][InputAudioConfig.model] that the caller is eligible for.
97+ //
98+ // Please see the [Dialogflow
99+ // docs](https://cloud.google.com/dialogflow/docs/data-logging) for
100+ // how to make your project eligible for enhanced models.
101+ USE_BEST_AVAILABLE = 1 ;
102+
103+ // Use standard model variant even if an enhanced model is available. See the
104+ // [Cloud Speech
105+ // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
106+ // for details about enhanced models.
107+ USE_STANDARD = 2 ;
108+
109+ // Use an enhanced model variant:
110+ //
111+ // * If an enhanced variant does not exist for the given
112+ // [model][google.cloud.dialogflow.cx.v3.InputAudioConfig.model] and request language, Dialogflow falls
113+ // back to the standard variant.
114+ //
115+ // The [Cloud Speech
116+ // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
117+ // describes which models have enhanced variants.
118+ //
119+ // * If the API caller isn't eligible for enhanced models, Dialogflow returns
120+ // an error. Please see the [Dialogflow
121+ // docs](https://cloud.google.com/dialogflow/docs/data-logging)
122+ // for how to make your project eligible.
123+ USE_ENHANCED = 3 ;
124+ }
125+
83126// Information for a word recognized by the speech recognizer.
84127message SpeechWordInfo {
85128 // The word this info is for.
@@ -158,47 +201,21 @@ message InputAudioConfig {
158201 bool single_utterance = 8 ;
159202}
160203
161- // Variant of the specified [Speech model][google.cloud.dialogflow.cx.v3.InputAudioConfig.model] to use.
162- //
163- // See the [Cloud Speech
164- // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
165- // for which models have different variants. For example, the "phone_call" model
166- // has both a standard and an enhanced variant. When you use an enhanced model,
167- // you will generally receive higher quality results than for a standard model.
168- enum SpeechModelVariant {
169- // No model variant specified. In this case Dialogflow defaults to
170- // USE_BEST_AVAILABLE.
171- SPEECH_MODEL_VARIANT_UNSPECIFIED = 0 ;
204+ // Gender of the voice as described in
205+ // [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
206+ enum SsmlVoiceGender {
207+ // An unspecified gender, which means that the client doesn't care which
208+ // gender the selected voice will have.
209+ SSML_VOICE_GENDER_UNSPECIFIED = 0 ;
172210
173- // Use the best available variant of the [Speech
174- // model][InputAudioConfig.model] that the caller is eligible for.
175- //
176- // Please see the [Dialogflow
177- // docs](https://cloud.google.com/dialogflow/docs/data-logging) for
178- // how to make your project eligible for enhanced models.
179- USE_BEST_AVAILABLE = 1 ;
211+ // A male voice.
212+ SSML_VOICE_GENDER_MALE = 1 ;
180213
181- // Use standard model variant even if an enhanced model is available. See the
182- // [Cloud Speech
183- // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
184- // for details about enhanced models.
185- USE_STANDARD = 2 ;
214+ // A female voice.
215+ SSML_VOICE_GENDER_FEMALE = 2 ;
186216
187- // Use an enhanced model variant:
188- //
189- // * If an enhanced variant does not exist for the given
190- // [model][google.cloud.dialogflow.cx.v3.InputAudioConfig.model] and request language, Dialogflow falls
191- // back to the standard variant.
192- //
193- // The [Cloud Speech
194- // documentation](https://cloud.google.com/speech-to-text/docs/enhanced-models)
195- // describes which models have enhanced variants.
196- //
197- // * If the API caller isn't eligible for enhanced models, Dialogflow returns
198- // an error. Please see the [Dialogflow
199- // docs](https://cloud.google.com/dialogflow/docs/data-logging)
200- // for how to make your project eligible.
201- USE_ENHANCED = 3 ;
217+ // A gender-neutral voice.
218+ SSML_VOICE_GENDER_NEUTRAL = 3 ;
202219}
203220
204221// Description of which voice to use for speech synthesis.
@@ -252,39 +269,6 @@ message SynthesizeSpeechConfig {
252269 VoiceSelectionParams voice = 4 ;
253270}
254271
255- // Instructs the speech synthesizer how to generate the output audio content.
256- message OutputAudioConfig {
257- // Required. Audio encoding of the synthesized audio content.
258- OutputAudioEncoding audio_encoding = 1 [(google.api.field_behavior ) = REQUIRED ];
259-
260- // Optional. The synthesis sample rate (in hertz) for this audio. If not
261- // provided, then the synthesizer will use the default sample rate based on
262- // the audio encoding. If this is different from the voice's natural sample
263- // rate, then the synthesizer will honor this request by converting to the
264- // desired sample rate (which might result in worse audio quality).
265- int32 sample_rate_hertz = 2 ;
266-
267- // Optional. Configuration of how speech should be synthesized.
268- SynthesizeSpeechConfig synthesize_speech_config = 3 ;
269- }
270-
271- // Gender of the voice as described in
272- // [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
273- enum SsmlVoiceGender {
274- // An unspecified gender, which means that the client doesn't care which
275- // gender the selected voice will have.
276- SSML_VOICE_GENDER_UNSPECIFIED = 0 ;
277-
278- // A male voice.
279- SSML_VOICE_GENDER_MALE = 1 ;
280-
281- // A female voice.
282- SSML_VOICE_GENDER_FEMALE = 2 ;
283-
284- // A gender-neutral voice.
285- SSML_VOICE_GENDER_NEUTRAL = 3 ;
286- }
287-
288272// Audio encoding of the output audio format in Text-To-Speech.
289273enum OutputAudioEncoding {
290274 // Not specified.
@@ -309,3 +293,19 @@ enum OutputAudioEncoding {
309293 // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
310294 OUTPUT_AUDIO_ENCODING_MULAW = 5 ;
311295}
296+
297+ // Instructs the speech synthesizer how to generate the output audio content.
298+ message OutputAudioConfig {
299+ // Required. Audio encoding of the synthesized audio content.
300+ OutputAudioEncoding audio_encoding = 1 [(google.api.field_behavior ) = REQUIRED ];
301+
302+ // Optional. The synthesis sample rate (in hertz) for this audio. If not
303+ // provided, then the synthesizer will use the default sample rate based on
304+ // the audio encoding. If this is different from the voice's natural sample
305+ // rate, then the synthesizer will honor this request by converting to the
306+ // desired sample rate (which might result in worse audio quality).
307+ int32 sample_rate_hertz = 2 ;
308+
309+ // Optional. Configuration of how speech should be synthesized.
310+ SynthesizeSpeechConfig synthesize_speech_config = 3 ;
311+ }
0 commit comments