@@ -29,31 +29,6 @@ option java_outer_classname = "AudioConfigProto";
2929option java_package = "com.google.cloud.dialogflow.cx.v3" ;
3030option objc_class_prefix = "DF" ;
3131
32- // Information for a word recognized by the speech recognizer.
33- message SpeechWordInfo {
34- // The word this info is for.
35- string word = 3 ;
36-
37- // Time offset relative to the beginning of the audio that corresponds to the
38- // start of the spoken word. This is an experimental feature and the accuracy
39- // of the time offset can vary.
40- google.protobuf.Duration start_offset = 1 ;
41-
42- // Time offset relative to the beginning of the audio that corresponds to the
43- // end of the spoken word. This is an experimental feature and the accuracy of
44- // the time offset can vary.
45- google.protobuf.Duration end_offset = 2 ;
46-
47- // The Speech confidence between 0.0 and 1.0 for this word. A higher number
48- // indicates an estimated greater likelihood that the recognized word is
49- // correct. The default of 0.0 is a sentinel value indicating that confidence
50- // was not set.
51- //
52- // This field is not guaranteed to be fully stable over time for the same
53- // audio input. Users should also not rely on it to always be provided.
54- float confidence = 4 ;
55- }
56-
5732// Audio encoding of the audio content sent in the conversational query request.
5833// Refer to the
5934// [Cloud Speech API
@@ -103,6 +78,31 @@ enum AudioEncoding {
10378 AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7 ;
10479}
10580
81+ // Information for a word recognized by the speech recognizer.
82+ message SpeechWordInfo {
83+ // The word this info is for.
84+ string word = 3 ;
85+
86+ // Time offset relative to the beginning of the audio that corresponds to the
87+ // start of the spoken word. This is an experimental feature and the accuracy
88+ // of the time offset can vary.
89+ google.protobuf.Duration start_offset = 1 ;
90+
91+ // Time offset relative to the beginning of the audio that corresponds to the
92+ // end of the spoken word. This is an experimental feature and the accuracy of
93+ // the time offset can vary.
94+ google.protobuf.Duration end_offset = 2 ;
95+
96+ // The Speech confidence between 0.0 and 1.0 for this word. A higher number
97+ // indicates an estimated greater likelihood that the recognized word is
98+ // correct. The default of 0.0 is a sentinel value indicating that confidence
99+ // was not set.
100+ //
101+ // This field is not guaranteed to be fully stable over time for the same
102+ // audio input. Users should also not rely on it to always be provided.
103+ float confidence = 4 ;
104+ }
105+
106106// Instructs the speech recognizer on how to process the audio content.
107107message InputAudioConfig {
108108 // Required. Audio encoding of the audio content to process.
0 commit comments