docs: clarified meaning of the legacy editions

Google APIs · copybara-github · commit bb1f3e9048c3 · 2021-10-21T09:42:47.000-07:00
docs: clarified semantic of the streaming APIs

PiperOrigin-RevId: 404815104
diff --git a/google/cloud/dialogflow/v2/agent.proto b/google/cloud/dialogflow/v2/agent.proto
@@ -293,14 +293,15 @@ message Agent {
     // Not specified. This value should never be used.
     TIER_UNSPECIFIED = 0;
 
-    // Standard tier.
+    // Trial Edition, previously known as Standard Edition.
     TIER_STANDARD = 1;
 
-    // Enterprise tier (Essentials).
+    // Essentials Edition, previously known as Enterprise Essential Edition.
     TIER_ENTERPRISE = 2;
 
-    // Enterprise tier (Plus).
-    TIER_ENTERPRISE_PLUS = 3;
+    // Essentials Edition (same as TIER_ENTERPRISE), previously known as
+    // Enterprise Plus Edition.
+    TIER_ENTERPRISE_PLUS = 3 [deprecated = true];
   }
 
   // Required. The project of this agent.
diff --git a/google/cloud/dialogflow/v2/audio_config.proto b/google/cloud/dialogflow/v2/audio_config.proto
@@ -30,36 +30,6 @@ option java_outer_classname = "AudioConfigProto";
 option java_package = "com.google.cloud.dialogflow.v2";
 option objc_class_prefix = "DF";
 
-// Hints for the speech recognizer to help with recognition in a specific
-// conversation state.
-message SpeechContext {
-  // Optional. A list of strings containing words and phrases that the speech
-  // recognizer should recognize with higher likelihood.
-  //
-  // This list can be used to:
-  //
-  // * improve accuracy for words and phrases you expect the user to say,
-  //   e.g. typical commands for your Dialogflow agent
-  // * add additional words to the speech recognizer vocabulary
-  // * ...
-  //
-  // See the [Cloud Speech
-  // documentation](https://cloud.google.com/speech-to-text/quotas) for usage
-  // limits.
-  repeated string phrases = 1;
-
-  // Optional. Boost for this context compared to other contexts:
-  //
-  // * If the boost is positive, Dialogflow will increase the probability that
-  //   the phrases in this context are recognized over similar sounding phrases.
-  // * If the boost is unspecified or non-positive, Dialogflow will not apply
-  //   any boost.
-  //
-  // Dialogflow recommends that you use boosts in the range (0, 20] and that you
-  // find a value that fits your use case with binary search.
-  float boost = 2;
-}
-
 // Audio encoding of the audio content sent in the conversational query request.
 // Refer to the
 // [Cloud Speech API
@@ -109,6 +79,36 @@ enum AudioEncoding {
   AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7;
 }
 
+// Hints for the speech recognizer to help with recognition in a specific
+// conversation state.
+message SpeechContext {
+  // Optional. A list of strings containing words and phrases that the speech
+  // recognizer should recognize with higher likelihood.
+  //
+  // This list can be used to:
+  //
+  // * improve accuracy for words and phrases you expect the user to say,
+  //   e.g. typical commands for your Dialogflow agent
+  // * add additional words to the speech recognizer vocabulary
+  // * ...
+  //
+  // See the [Cloud Speech
+  // documentation](https://cloud.google.com/speech-to-text/quotas) for usage
+  // limits.
+  repeated string phrases = 1;
+
+  // Optional. Boost for this context compared to other contexts:
+  //
+  // * If the boost is positive, Dialogflow will increase the probability that
+  //   the phrases in this context are recognized over similar sounding phrases.
+  // * If the boost is unspecified or non-positive, Dialogflow will not apply
+  //   any boost.
+  //
+  // Dialogflow recommends that you use boosts in the range (0, 20] and that you
+  // find a value that fits your use case with binary search.
+  float boost = 2;
+}
+
 // Information for a word recognized by the speech recognizer.
 message SpeechWordInfo {
   // The word this info is for.
diff --git a/google/cloud/dialogflow/v2/session.proto b/google/cloud/dialogflow/v2/session.proto
@@ -479,10 +479,11 @@ message StreamingDetectIntentRequest {
 //
 // Multiple response messages can be returned in order:
 //
-// 1.  If the input was set to streaming audio, the first one or more messages
-//     contain `recognition_result`. Each `recognition_result` represents a more
-//     complete transcript of what the user said. The last `recognition_result`
-//     has `is_final` set to `true`.
+// 1.  If the `StreamingDetectIntentRequest.input_audio` field was
+//     set, the `recognition_result` field is populated for one
+//     or more messages.
+//     See the [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] message for details
+//     about the result message sequence.
 //
 // 2.  The next message contains `response_id`, `query_result`
 //     and optionally `webhook_status` if a WebHook was called.
@@ -520,35 +521,39 @@ message StreamingDetectIntentResponse {
 // that is currently being processed or an indication that this is the end
 // of the single requested utterance.
 //
-// Example:
+// While end-user audio is being processed, Dialogflow sends a series of
+// results. Each result may contain a `transcript` value. A transcript
+// represents a portion of the utterance. While the recognizer is processing
+// audio, transcript values may be interim values or finalized values.
+// Once a transcript is finalized, the `is_final` value is set to true and
+// processing continues for the next transcript.
 //
-// 1.  transcript: "tube"
+// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
+// was true, and the recognizer has completed processing audio,
+// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
+// following (last) result contains the last finalized transcript.
 //
-// 2.  transcript: "to be a"
+// The complete end-user utterance is determined by concatenating the
+// finalized transcript values received for the series of results.
 //
-// 3.  transcript: "to be"
+// In the following example, single utterance is enabled. In the case where
+// single utterance is not enabled, result 7 would not occur.
 //
-// 4.  transcript: "to be or not to be"
-//     is_final: true
+// ```
+// Num | transcript              | message_type            | is_final
+// --- | ----------------------- | ----------------------- | --------
+// 1   | "tube"                  | TRANSCRIPT              | false
+// 2   | "to be a"               | TRANSCRIPT              | false
+// 3   | "to be"                 | TRANSCRIPT              | false
+// 4   | "to be or not to be"    | TRANSCRIPT              | true
+// 5   | "that's"                | TRANSCRIPT              | false
+// 6   | "that is                | TRANSCRIPT              | false
+// 7   | unset                   | END_OF_SINGLE_UTTERANCE | unset
+// 8   | " that is the question" | TRANSCRIPT              | true
+// ```
 //
-// 5.  transcript: " that's"
-//
-// 6.  transcript: " that is"
-//
-// 7.  message_type: `END_OF_SINGLE_UTTERANCE`
-//
-// 8.  transcript: " that is the question"
-//     is_final: true
-//
-// Only two of the responses contain final results (#4 and #8 indicated by
-// `is_final: true`). Concatenating these generates the full transcript: "to be
-// or not to be that is the question".
-//
-// In each response we populate:
-//
-// *  for `TRANSCRIPT`: `transcript` and possibly `is_final`.
-//
-// *  for `END_OF_SINGLE_UTTERANCE`: only `message_type`.
+// Concatenating the finalized transcripts with `is_final` set to true,
+// the complete utterance becomes "to be or not to be that is the question".
 message StreamingRecognitionResult {
   // Type of the response message.
   enum MessageType {