@@ -479,10 +479,11 @@ message StreamingDetectIntentRequest {
479479//
480480// Multiple response messages can be returned in order:
481481//
482- // 1. If the input was set to streaming audio, the first one or more messages
483- // contain `recognition_result`. Each `recognition_result` represents a more
484- // complete transcript of what the user said. The last `recognition_result`
485- // has `is_final` set to `true`.
482+ // 1. If the `StreamingDetectIntentRequest.input_audio` field was
483+ // set, the `recognition_result` field is populated for one
484+ // or more messages.
485+ // See the [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] message for details
486+ // about the result message sequence.
486487//
487488// 2. The next message contains `response_id`, `query_result`
488489// and optionally `webhook_status` if a WebHook was called.
@@ -520,35 +521,39 @@ message StreamingDetectIntentResponse {
520521// that is currently being processed or an indication that this is the end
521522// of the single requested utterance.
522523//
523- // Example:
524+ // While end-user audio is being processed, Dialogflow sends a series of
525+ // results. Each result may contain a `transcript` value. A transcript
526+ // represents a portion of the utterance. While the recognizer is processing
527+ // audio, transcript values may be interim values or finalized values.
528+ // Once a transcript is finalized, the `is_final` value is set to true and
529+ // processing continues for the next transcript.
524530//
525- // 1. transcript: "tube"
531+ // If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
532+ // was true, and the recognizer has completed processing audio,
533+ // the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
534+ // following (last) result contains the last finalized transcript.
526535//
527- // 2. transcript: "to be a"
536+ // The complete end-user utterance is determined by concatenating the
537+ // finalized transcript values received for the series of results.
528538//
529- // 3. transcript: "to be"
539+ // In the following example, single utterance is enabled. In the case where
540+ // single utterance is not enabled, result 7 would not occur.
530541//
531- // 4. transcript: "to be or not to be"
532- // is_final: true
542+ // ```
543+ // Num | transcript | message_type | is_final
544+ // --- | ----------------------- | ----------------------- | --------
545+ // 1 | "tube" | TRANSCRIPT | false
546+ // 2 | "to be a" | TRANSCRIPT | false
547+ // 3 | "to be" | TRANSCRIPT | false
548+ // 4 | "to be or not to be" | TRANSCRIPT | true
549+ // 5 | "that's" | TRANSCRIPT | false
550+ // 6 | "that is | TRANSCRIPT | false
551+ // 7 | unset | END_OF_SINGLE_UTTERANCE | unset
552+ // 8 | " that is the question" | TRANSCRIPT | true
553+ // ```
533554//
534- // 5. transcript: " that's"
535- //
536- // 6. transcript: " that is"
537- //
538- // 7. message_type: `END_OF_SINGLE_UTTERANCE`
539- //
540- // 8. transcript: " that is the question"
541- // is_final: true
542- //
543- // Only two of the responses contain final results (#4 and #8 indicated by
544- // `is_final: true`). Concatenating these generates the full transcript: "to be
545- // or not to be that is the question".
546- //
547- // In each response we populate:
548- //
549- // * for `TRANSCRIPT`: `transcript` and possibly `is_final`.
550- //
551- // * for `END_OF_SINGLE_UTTERANCE`: only `message_type`.
555+ // Concatenating the finalized transcripts with `is_final` set to true,
556+ // the complete utterance becomes "to be or not to be that is the question".
552557message StreamingRecognitionResult {
553558 // Type of the response message.
554559 enum MessageType {
0 commit comments