@@ -184,6 +184,39 @@ message SpeechWordInfo {
184184 float confidence = 4 ;
185185}
186186
187+ // Configuration of the barge-in behavior. Barge-in instructs the API to return
188+ // a detected utterance at a proper time while the client is playing back the
189+ // response audio from a previous request. When the client sees the
190+ // utterance, it should stop the playback and immediately get ready for
191+ // receiving the responses for the current request.
192+ //
193+ // The barge-in handling requires the client to start streaming audio input
194+ // as soon as it starts playing back the audio from the previous response. The
195+ // playback is modeled into two phases:
196+ //
197+ // * No barge-in phase: which goes first and during which speech detection
198+ // should not be carried out.
199+ //
200+ // * Barge-in phase: which follows the no barge-in phase and during which
201+ // the API starts speech detection and may inform the client that an utterance
202+ // has been detected. Note that no-speech event is not expected in this
203+ // phase.
204+ //
205+ // The client provides this configuration in terms of the durations of those
206+ // two phases. The durations are measured in terms of the audio length fromt the
207+ // the start of the input audio.
208+ //
209+ // No-speech event is a response with END_OF_UTTERANCE without any transcript
210+ // following up.
211+ message BargeInConfig {
212+ // Duration that is not eligible for barge-in at the beginning of the input
213+ // audio.
214+ google.protobuf.Duration no_barge_in_duration = 1 ;
215+
216+ // Total duration for the playback at the beginning of the input audio.
217+ google.protobuf.Duration total_duration = 2 ;
218+ }
219+
187220// Instructs the speech recognizer on how to process the audio content.
188221message InputAudioConfig {
189222 // Required. Audio encoding of the audio content to process.
@@ -266,6 +299,9 @@ message InputAudioConfig {
266299 // If `false` and recognition doesn't return any result, trigger
267300 // `NO_SPEECH_RECOGNIZED` event to Dialogflow agent.
268301 bool disable_no_speech_recognized_event = 14 ;
302+
303+ // Configuration of barge-in behavior during the streaming of input audio.
304+ BargeInConfig barge_in_config = 15 ;
269305}
270306
271307// Gender of the voice as described in
0 commit comments