@@ -31,6 +31,31 @@ option java_package = "com.google.cloud.dialogflow.cx.v3beta1";
3131option objc_class_prefix = "DF" ;
3232option ruby_package = "Google::Cloud::Dialogflow::CX::V3beta1" ;
3333
34+ // Information for a word recognized by the speech recognizer.
35+ message SpeechWordInfo {
36+ // The word this info is for.
37+ string word = 3 ;
38+
39+ // Time offset relative to the beginning of the audio that corresponds to the
40+ // start of the spoken word. This is an experimental feature and the accuracy
41+ // of the time offset can vary.
42+ google.protobuf.Duration start_offset = 1 ;
43+
44+ // Time offset relative to the beginning of the audio that corresponds to the
45+ // end of the spoken word. This is an experimental feature and the accuracy of
46+ // the time offset can vary.
47+ google.protobuf.Duration end_offset = 2 ;
48+
49+ // The Speech confidence between 0.0 and 1.0 for this word. A higher number
50+ // indicates an estimated greater likelihood that the recognized word is
51+ // correct. The default of 0.0 is a sentinel value indicating that confidence
52+ // was not set.
53+ //
54+ // This field is not guaranteed to be fully stable over time for the same
55+ // audio input. Users should also not rely on it to always be provided.
56+ float confidence = 4 ;
57+ }
58+
3459// Audio encoding of the audio content sent in the conversational query request.
3560// Refer to the
3661// [Cloud Speech API
@@ -80,31 +105,6 @@ enum AudioEncoding {
80105 AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7 ;
81106}
82107
83- // Information for a word recognized by the speech recognizer.
84- message SpeechWordInfo {
85- // The word this info is for.
86- string word = 3 ;
87-
88- // Time offset relative to the beginning of the audio that corresponds to the
89- // start of the spoken word. This is an experimental feature and the accuracy
90- // of the time offset can vary.
91- google.protobuf.Duration start_offset = 1 ;
92-
93- // Time offset relative to the beginning of the audio that corresponds to the
94- // end of the spoken word. This is an experimental feature and the accuracy of
95- // the time offset can vary.
96- google.protobuf.Duration end_offset = 2 ;
97-
98- // The Speech confidence between 0.0 and 1.0 for this word. A higher number
99- // indicates an estimated greater likelihood that the recognized word is
100- // correct. The default of 0.0 is a sentinel value indicating that confidence
101- // was not set.
102- //
103- // This field is not guaranteed to be fully stable over time for the same
104- // audio input. Users should also not rely on it to always be provided.
105- float confidence = 4 ;
106- }
107-
108108// Instructs the speech recognizer on how to process the audio content.
109109message InputAudioConfig {
110110 // Required. Audio encoding of the audio content to process.
0 commit comments