@@ -19,6 +19,7 @@ package google.cloud.dialogflow.cx.v3;
1919import "google/api/field_behavior.proto" ;
2020import "google/api/resource.proto" ;
2121import "google/protobuf/duration.proto" ;
22+ import "google/protobuf/timestamp.proto" ;
2223import "google/api/annotations.proto" ;
2324
2425option cc_enable_arenas = true ;
@@ -30,31 +31,6 @@ option java_package = "com.google.cloud.dialogflow.cx.v3";
3031option objc_class_prefix = "DF" ;
3132option ruby_package = "Google::Cloud::Dialogflow::CX::V3" ;
3233
33- // Information for a word recognized by the speech recognizer.
34- message SpeechWordInfo {
35- // The word this info is for.
36- string word = 3 ;
37-
38- // Time offset relative to the beginning of the audio that corresponds to the
39- // start of the spoken word. This is an experimental feature and the accuracy
40- // of the time offset can vary.
41- google.protobuf.Duration start_offset = 1 ;
42-
43- // Time offset relative to the beginning of the audio that corresponds to the
44- // end of the spoken word. This is an experimental feature and the accuracy of
45- // the time offset can vary.
46- google.protobuf.Duration end_offset = 2 ;
47-
48- // The Speech confidence between 0.0 and 1.0 for this word. A higher number
49- // indicates an estimated greater likelihood that the recognized word is
50- // correct. The default of 0.0 is a sentinel value indicating that confidence
51- // was not set.
52- //
53- // This field is not guaranteed to be fully stable over time for the same
54- // audio input. Users should also not rely on it to always be provided.
55- float confidence = 4 ;
56- }
57-
5834// Audio encoding of the audio content sent in the conversational query request.
5935// Refer to the
6036// [Cloud Speech API
@@ -104,6 +80,31 @@ enum AudioEncoding {
10480 AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7 ;
10581}
10682
83+ // Information for a word recognized by the speech recognizer.
84+ message SpeechWordInfo {
85+ // The word this info is for.
86+ string word = 3 ;
87+
88+ // Time offset relative to the beginning of the audio that corresponds to the
89+ // start of the spoken word. This is an experimental feature and the accuracy
90+ // of the time offset can vary.
91+ google.protobuf.Duration start_offset = 1 ;
92+
93+ // Time offset relative to the beginning of the audio that corresponds to the
94+ // end of the spoken word. This is an experimental feature and the accuracy of
95+ // the time offset can vary.
96+ google.protobuf.Duration end_offset = 2 ;
97+
98+ // The Speech confidence between 0.0 and 1.0 for this word. A higher number
99+ // indicates an estimated greater likelihood that the recognized word is
100+ // correct. The default of 0.0 is a sentinel value indicating that confidence
101+ // was not set.
102+ //
103+ // This field is not guaranteed to be fully stable over time for the same
104+ // audio input. Users should also not rely on it to always be provided.
105+ float confidence = 4 ;
106+ }
107+
107108// Instructs the speech recognizer on how to process the audio content.
108109message InputAudioConfig {
109110 // Required. Audio encoding of the audio content to process.
0 commit comments