Skip to content

Commit 4f586ea

Browse files
Google APIscopybara-github
authored andcommitted
docs: Add documentation for latest models to RecognitionConfig
PiperOrigin-RevId: 446200223
1 parent 27ee094 commit 4f586ea

2 files changed

Lines changed: 84 additions & 50 deletions

File tree

google/cloud/speech/v1/cloud_speech.proto

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ option objc_class_prefix = "GCS";
3636
// Service that implements Google Cloud Speech API.
3737
service Speech {
3838
option (google.api.default_host) = "speech.googleapis.com";
39-
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
39+
option (google.api.oauth_scopes) =
40+
"https://www.googleapis.com/auth/cloud-platform";
4041

4142
// Performs synchronous speech recognition: receive results after all audio
4243
// has been sent and processed.
@@ -54,7 +55,8 @@ service Speech {
5455
// a `LongRunningRecognizeResponse` message.
5556
// For more information on asynchronous speech recognition, see the
5657
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
57-
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
58+
rpc LongRunningRecognize(LongRunningRecognizeRequest)
59+
returns (google.longrunning.Operation) {
5860
option (google.api.http) = {
5961
post: "/v1/speech:longrunningrecognize"
6062
body: "*"
@@ -68,8 +70,8 @@ service Speech {
6870

6971
// Performs bidirectional streaming speech recognition: receive results while
7072
// sending audio. This method is only available via the gRPC API (not REST).
71-
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
72-
}
73+
rpc StreamingRecognize(stream StreamingRecognizeRequest)
74+
returns (stream StreamingRecognizeResponse) {}
7375
}
7476

7577
// The top-level message sent by the client for the `Recognize` method.
@@ -93,7 +95,8 @@ message LongRunningRecognizeRequest {
9395
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
9496

9597
// Optional. Specifies an optional destination for the recognition results.
96-
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
98+
TranscriptOutputConfig output_config = 4
99+
[(google.api.field_behavior) = OPTIONAL];
97100
}
98101

99102
// Specifies an optional destination for the recognition results.
@@ -193,7 +196,8 @@ message RecognitionConfig {
193196
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
194197
// encoding configuration must match the encoding described in the audio
195198
// header; otherwise the request returns an
196-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
199+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
200+
// code.
197201
enum AudioEncoding {
198202
// Not specified.
199203
ENCODING_UNSPECIFIED = 0;
@@ -246,7 +250,8 @@ message RecognitionConfig {
246250

247251
// Encoding of audio data sent in all `RecognitionAudio` messages.
248252
// This field is optional for `FLAC` and `WAV` audio files and required
249-
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
253+
// for all other audio formats. For details, see
254+
// [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
250255
AudioEncoding encoding = 1;
251256

252257
// Sample rate in Hertz of the audio data sent in all
@@ -255,7 +260,8 @@ message RecognitionConfig {
255260
// source to 16000 Hz. If that's not possible, use the native sample rate of
256261
// the audio source (instead of re-sampling).
257262
// This field is optional for FLAC and WAV audio files, but is
258-
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
263+
// required for all other audio formats. For details, see
264+
// [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
259265
int32 sample_rate_hertz = 2;
260266

261267
// The number of channels in the input audio data.
@@ -383,6 +389,19 @@ message RecognitionConfig {
383389
// <td><b>Description</b></td>
384390
// </tr>
385391
// <tr>
392+
// <td><code>latest_long</code></td>
393+
// <td>Best for long form content like media or conversation.</td>
394+
// </tr>
395+
// <tr>
396+
// <td><code>latest_short</code></td>
397+
// <td>Best for short form content like commands or single shot directed
398+
// speech.</td>
399+
// </tr>
400+
// <tr>
401+
// <td><code>command_and_search</code></td>
402+
// <td>Best for short queries such as voice commands or voice search.</td>
403+
// </tr>
404+
// <tr>
386405
// <td><code>command_and_search</code></td>
387406
// <td>Best for short queries such as voice commands or voice search.</td>
388407
// </tr>
@@ -436,10 +455,8 @@ message SpeakerDiarizationConfig {
436455
int32 max_speaker_count = 3;
437456

438457
// Output only. Unused.
439-
int32 speaker_tag = 5 [
440-
deprecated = true,
441-
(google.api.field_behavior) = OUTPUT_ONLY
442-
];
458+
int32 speaker_tag = 5
459+
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
443460
}
444461

445462
// Description of audio data to be recognized.
@@ -599,8 +616,8 @@ message SpeechContext {
599616

600617
// Contains audio data in the encoding specified in the `RecognitionConfig`.
601618
// Either `content` or `uri` must be supplied. Supplying both or neither
602-
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
603-
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
619+
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
620+
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
604621
message RecognitionAudio {
605622
// The audio source, which is either inline content or a Google Cloud
606623
// Storage uri.
@@ -615,8 +632,9 @@ message RecognitionAudio {
615632
// Currently, only Google Cloud Storage URIs are
616633
// supported, which must be specified in the following format:
617634
// `gs://bucket_name/object_name` (other URI formats return
618-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
619-
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
635+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
636+
// For more information, see [Request
637+
// URIs](https://cloud.google.com/storage/docs/reference-uris).
620638
string uri = 2;
621639
}
622640
}
@@ -667,8 +685,8 @@ message LongRunningRecognizeMetadata {
667685
// Time of the most recent processing update.
668686
google.protobuf.Timestamp last_update_time = 3;
669687

670-
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
671-
// as byte content.
688+
// Output only. The URI of the audio file being transcribed. Empty if the
689+
// audio was sent as byte content.
672690
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
673691
}
674692

@@ -787,9 +805,9 @@ message StreamingRecognitionResult {
787805
// For audio_channel_count = N, its output values can range from '1' to 'N'.
788806
int32 channel_tag = 5;
789807

790-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
791-
// of the language in this result. This language code was detected to have
792-
// the most likelihood of being spoken in the audio.
808+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
809+
// language tag of the language in this result. This language code was
810+
// detected to have the most likelihood of being spoken in the audio.
793811
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
794812
}
795813

@@ -810,9 +828,9 @@ message SpeechRecognitionResult {
810828
// beginning of the audio.
811829
google.protobuf.Duration result_end_time = 4;
812830

813-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
814-
// of the language in this result. This language code was detected to have
815-
// the most likelihood of being spoken in the audio.
831+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
832+
// language tag of the language in this result. This language code was
833+
// detected to have the most likelihood of being spoken in the audio.
816834
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
817835
}
818836

google/cloud/speech/v1p1beta1/cloud_speech.proto

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ option objc_class_prefix = "GCS";
3737
// Service that implements Google Cloud Speech API.
3838
service Speech {
3939
option (google.api.default_host) = "speech.googleapis.com";
40-
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
40+
option (google.api.oauth_scopes) =
41+
"https://www.googleapis.com/auth/cloud-platform";
4142

4243
// Performs synchronous speech recognition: receive results after all audio
4344
// has been sent and processed.
@@ -55,7 +56,8 @@ service Speech {
5556
// a `LongRunningRecognizeResponse` message.
5657
// For more information on asynchronous speech recognition, see the
5758
// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
58-
rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
59+
rpc LongRunningRecognize(LongRunningRecognizeRequest)
60+
returns (google.longrunning.Operation) {
5961
option (google.api.http) = {
6062
post: "/v1p1beta1/speech:longrunningrecognize"
6163
body: "*"
@@ -69,8 +71,8 @@ service Speech {
6971

7072
// Performs bidirectional streaming speech recognition: receive results while
7173
// sending audio. This method is only available via the gRPC API (not REST).
72-
rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
73-
}
74+
rpc StreamingRecognize(stream StreamingRecognizeRequest)
75+
returns (stream StreamingRecognizeResponse) {}
7476
}
7577

7678
// The top-level message sent by the client for the `Recognize` method.
@@ -94,7 +96,8 @@ message LongRunningRecognizeRequest {
9496
RecognitionAudio audio = 2 [(google.api.field_behavior) = REQUIRED];
9597

9698
// Optional. Specifies an optional destination for the recognition results.
97-
TranscriptOutputConfig output_config = 4 [(google.api.field_behavior) = OPTIONAL];
99+
TranscriptOutputConfig output_config = 4
100+
[(google.api.field_behavior) = OPTIONAL];
98101
}
99102

100103
// Specifies an optional destination for the recognition results.
@@ -194,7 +197,8 @@ message RecognitionConfig {
194197
// an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the
195198
// encoding configuration must match the encoding described in the audio
196199
// header; otherwise the request returns an
197-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
200+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error
201+
// code.
198202
enum AudioEncoding {
199203
// Not specified.
200204
ENCODING_UNSPECIFIED = 0;
@@ -253,7 +257,8 @@ message RecognitionConfig {
253257

254258
// Encoding of audio data sent in all `RecognitionAudio` messages.
255259
// This field is optional for `FLAC` and `WAV` audio files and required
256-
// for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
260+
// for all other audio formats. For details, see
261+
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
257262
AudioEncoding encoding = 1;
258263

259264
// Sample rate in Hertz of the audio data sent in all
@@ -262,7 +267,8 @@ message RecognitionConfig {
262267
// source to 16000 Hz. If that's not possible, use the native sample rate of
263268
// the audio source (instead of re-sampling).
264269
// This field is optional for FLAC and WAV audio files, but is
265-
// required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
270+
// required for all other audio formats. For details, see
271+
// [AudioEncoding][google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding].
266272
int32 sample_rate_hertz = 2;
267273

268274
// The number of channels in the input audio data.
@@ -407,6 +413,15 @@ message RecognitionConfig {
407413
// <td><b>Description</b></td>
408414
// </tr>
409415
// <tr>
416+
// <td><code>latest_long</code></td>
417+
// <td>Best for long form content like media or conversation.</td>
418+
// </tr>
419+
// <tr>
420+
// <td><code>latest_short</code></td>
421+
// <td>Best for short form content like commands or single shot directed
422+
// speech.</td>
423+
// </tr>
424+
// <tr>
410425
// <td><code>command_and_search</code></td>
411426
// <td>Best for short queries such as voice commands or voice search.</td>
412427
// </tr>
@@ -460,10 +475,8 @@ message SpeakerDiarizationConfig {
460475
int32 max_speaker_count = 3;
461476

462477
// Output only. Unused.
463-
int32 speaker_tag = 5 [
464-
deprecated = true,
465-
(google.api.field_behavior) = OUTPUT_ONLY
466-
];
478+
int32 speaker_tag = 5
479+
[deprecated = true, (google.api.field_behavior) = OUTPUT_ONLY];
467480
}
468481

469482
// Description of audio data to be recognized.
@@ -627,8 +640,8 @@ message SpeechContext {
627640

628641
// Contains audio data in the encoding specified in the `RecognitionConfig`.
629642
// Either `content` or `uri` must be supplied. Supplying both or neither
630-
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
631-
// [content limits](https://cloud.google.com/speech-to-text/quotas#content).
643+
// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
644+
// See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
632645
message RecognitionAudio {
633646
// The audio source, which is either inline content or a Google Cloud
634647
// Storage uri.
@@ -643,8 +656,9 @@ message RecognitionAudio {
643656
// Currently, only Google Cloud Storage URIs are
644657
// supported, which must be specified in the following format:
645658
// `gs://bucket_name/object_name` (other URI formats return
646-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
647-
// [Request URIs](https://cloud.google.com/storage/docs/reference-uris).
659+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]).
660+
// For more information, see [Request
661+
// URIs](https://cloud.google.com/storage/docs/reference-uris).
648662
string uri = 2;
649663
}
650664
}
@@ -695,12 +709,14 @@ message LongRunningRecognizeMetadata {
695709
// Time of the most recent processing update.
696710
google.protobuf.Timestamp last_update_time = 3;
697711

698-
// Output only. The URI of the audio file being transcribed. Empty if the audio was sent
699-
// as byte content.
712+
// Output only. The URI of the audio file being transcribed. Empty if the
713+
// audio was sent as byte content.
700714
string uri = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
701715

702-
// Output only. A copy of the TranscriptOutputConfig if it was set in the request.
703-
TranscriptOutputConfig output_config = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
716+
// Output only. A copy of the TranscriptOutputConfig if it was set in the
717+
// request.
718+
TranscriptOutputConfig output_config = 5
719+
[(google.api.field_behavior) = OUTPUT_ONLY];
704720
}
705721

706722
// `StreamingRecognizeResponse` is the only message returned to the client by
@@ -818,9 +834,9 @@ message StreamingRecognitionResult {
818834
// For audio_channel_count = N, its output values can range from '1' to 'N'.
819835
int32 channel_tag = 5;
820836

821-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
822-
// of the language in this result. This language code was detected to have
823-
// the most likelihood of being spoken in the audio.
837+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
838+
// language tag of the language in this result. This language code was
839+
// detected to have the most likelihood of being spoken in the audio.
824840
string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
825841
}
826842

@@ -841,9 +857,9 @@ message SpeechRecognitionResult {
841857
// beginning of the audio.
842858
google.protobuf.Duration result_end_time = 4;
843859

844-
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag
845-
// of the language in this result. This language code was detected to have
846-
// the most likelihood of being spoken in the audio.
860+
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
861+
// language tag of the language in this result. This language code was
862+
// detected to have the most likelihood of being spoken in the audio.
847863
string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
848864
}
849865

0 commit comments

Comments
 (0)