@@ -29,6 +29,31 @@ option java_outer_classname = "AudioConfigProto";
2929option java_package = "com.google.cloud.dialogflow.cx.v3" ;
3030option objc_class_prefix = "DF" ;
3131
32+ // Information for a word recognized by the speech recognizer.
33+ message SpeechWordInfo {
34+ // The word this info is for.
35+ string word = 3 ;
36+
37+ // Time offset relative to the beginning of the audio that corresponds to the
38+ // start of the spoken word. This is an experimental feature and the accuracy
39+ // of the time offset can vary.
40+ google.protobuf.Duration start_offset = 1 ;
41+
42+ // Time offset relative to the beginning of the audio that corresponds to the
43+ // end of the spoken word. This is an experimental feature and the accuracy of
44+ // the time offset can vary.
45+ google.protobuf.Duration end_offset = 2 ;
46+
47+ // The Speech confidence between 0.0 and 1.0 for this word. A higher number
48+ // indicates an estimated greater likelihood that the recognized word is
49+ // correct. The default of 0.0 is a sentinel value indicating that confidence
50+ // was not set.
51+ //
52+ // This field is not guaranteed to be fully stable over time for the same
53+ // audio input. Users should also not rely on it to always be provided.
54+ float confidence = 4 ;
55+ }
56+
3257// Audio encoding of the audio content sent in the conversational query request.
3358// Refer to the
3459// [Cloud Speech API
@@ -78,31 +103,6 @@ enum AudioEncoding {
78103 AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7 ;
79104}
80105
81- // Information for a word recognized by the speech recognizer.
82- message SpeechWordInfo {
83- // The word this info is for.
84- string word = 3 ;
85-
86- // Time offset relative to the beginning of the audio that corresponds to the
87- // start of the spoken word. This is an experimental feature and the accuracy
88- // of the time offset can vary.
89- google.protobuf.Duration start_offset = 1 ;
90-
91- // Time offset relative to the beginning of the audio that corresponds to the
92- // end of the spoken word. This is an experimental feature and the accuracy of
93- // the time offset can vary.
94- google.protobuf.Duration end_offset = 2 ;
95-
96- // The Speech confidence between 0.0 and 1.0 for this word. A higher number
97- // indicates an estimated greater likelihood that the recognized word is
98- // correct. The default of 0.0 is a sentinel value indicating that confidence
99- // was not set.
100- //
101- // This field is not guaranteed to be fully stable over time for the same
102- // audio input. Users should also not rely on it to always be provided.
103- float confidence = 4 ;
104- }
105-
106106// Instructs the speech recognizer on how to process the audio content.
107107message InputAudioConfig {
108108 // Required. Audio encoding of the audio content to process.
0 commit comments