@@ -582,10 +582,11 @@ message StreamingDetectIntentRequest {
582582//
583583// Multiple response messages can be returned in order:
584584//
585- // 1. If the input was set to streaming audio, the first one or more messages
586- // contain `recognition_result`. Each `recognition_result` represents a more
587- // complete transcript of what the user said. The last `recognition_result`
588- // has `is_final` set to `true`.
585+ // 1. If the `StreamingDetectIntentRequest.input_audio` field was
586+ // set, the `recognition_result` field is populated for one
587+ // or more messages.
588+ // See the [StreamingRecognitionResult][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult] message for details
589+ // about the result message sequence.
589590//
590591// 2. The next message contains `response_id`, `query_result`,
591592// `alternative_query_results` and optionally `webhook_status` if a WebHook
@@ -639,35 +640,39 @@ message StreamingDetectIntentResponse {
639640// that is currently being processed or an indication that this is the end
640641// of the single requested utterance.
641642//
642- // Example:
643+ // While end-user audio is being processed, Dialogflow sends a series of
644+ // results. Each result may contain a `transcript` value. A transcript
645+ // represents a portion of the utterance. While the recognizer is processing
646+ // audio, transcript values may be interim values or finalized values.
647+ // Once a transcript is finalized, the `is_final` value is set to true and
648+ // processing continues for the next transcript.
643649//
644- // 1. transcript: "tube"
650+ // If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
651+ // was true, and the recognizer has completed processing audio,
652+ // the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
653+ // following (last) result contains the last finalized transcript.
645654//
646- // 2. transcript: "to be a"
655+ // The complete end-user utterance is determined by concatenating the
656+ // finalized transcript values received for the series of results.
647657//
648- // 3. transcript: "to be"
658+ // In the following example, single utterance is enabled. In the case where
659+ // single utterance is not enabled, result 7 would not occur.
649660//
650- // 4. transcript: "to be or not to be"
651- // is_final: true
661+ // ```
662+ // Num | transcript | message_type | is_final
663+ // --- | ----------------------- | ----------------------- | --------
664+ // 1 | "tube" | TRANSCRIPT | false
665+ // 2 | "to be a" | TRANSCRIPT | false
666+ // 3 | "to be" | TRANSCRIPT | false
667+ // 4 | "to be or not to be" | TRANSCRIPT | true
668+ // 5 | "that's" | TRANSCRIPT | false
669+ // 6 | "that is | TRANSCRIPT | false
670+ // 7 | unset | END_OF_SINGLE_UTTERANCE | unset
671+ // 8 | " that is the question" | TRANSCRIPT | true
672+ // ```
652673//
653- // 5. transcript: " that's"
654- //
655- // 6. transcript: " that is"
656- //
657- // 7. message_type: `END_OF_SINGLE_UTTERANCE`
658- //
659- // 8. transcript: " that is the question"
660- // is_final: true
661- //
662- // Only two of the responses contain final results (#4 and #8 indicated by
663- // `is_final: true`). Concatenating these generates the full transcript: "to be
664- // or not to be that is the question".
665- //
666- // In each response we populate:
667- //
668- // * for `TRANSCRIPT`: `transcript` and possibly `is_final`.
669- //
670- // * for `END_OF_SINGLE_UTTERANCE`: only `message_type`.
674+ // Concatenating the finalized transcripts with `is_final` set to true,
675+ // the complete utterance becomes "to be or not to be that is the question".
671676message StreamingRecognitionResult {
672677 // Type of the response message.
673678 enum MessageType {
0 commit comments