Skip to content

Commit 4d70a58

Browse files
Google APIscopybara-github
authored andcommitted
feat: add API for writing BatchRecognize transcripts in SRT and VTT formats
docs: update field documentation based on field behavior updates PiperOrigin-RevId: 609024258
1 parent 1608c32 commit 4d70a58

1 file changed

Lines changed: 75 additions & 14 deletions

File tree

google/cloud/speech/v2/cloud_speech.proto

Lines changed: 75 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -574,9 +574,12 @@ message Recognizer {
574574
DELETED = 4;
575575
}
576576

577-
// Output only. The resource name of the Recognizer.
577+
// Output only. Identifier. The resource name of the Recognizer.
578578
// Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
579-
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
579+
string name = 1 [
580+
(google.api.field_behavior) = OUTPUT_ONLY,
581+
(google.api.field_behavior) = IDENTIFIER
582+
];
580583

581584
// Output only. System-assigned unique identifier for the Recognizer.
582585
string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
@@ -1278,6 +1281,34 @@ message GcsOutputConfig {
12781281
// Output configurations for inline response.
12791282
message InlineOutputConfig {}
12801283

1284+
// Output configurations for serialized `BatchRecognizeResults` protos.
1285+
message NativeOutputFileFormatConfig {}
1286+
1287+
// Output configurations for [WebVTT](https://www.w3.org/TR/webvtt1/) formatted
1288+
// subtitle file.
1289+
message VttOutputFileFormatConfig {}
1290+
1291+
// Output configurations [SubRip
1292+
// Text](https://www.matroska.org/technical/subtitles.html#srt-subtitles)
1293+
// formatted subtitle file.
1294+
message SrtOutputFileFormatConfig {}
1295+
1296+
// Configuration for the format of the results stored to `output`.
1297+
message OutputFormatConfig {
1298+
// Configuration for the native output format. If this field is set or if no
1299+
// other output format field is set then transcripts will be written to the
1300+
// sink in the native format.
1301+
NativeOutputFileFormatConfig native = 1;
1302+
1303+
// Configuration for the vtt output format. If this field is set then
1304+
// transcripts will be written to the sink in the vtt format.
1305+
VttOutputFileFormatConfig vtt = 2;
1306+
1307+
// Configuration for the srt output format. If this field is set then
1308+
// transcripts will be written to the sink in the srt format.
1309+
SrtOutputFileFormatConfig srt = 3;
1310+
}
1311+
12811312
// Configuration options for the output(s) of recognition.
12821313
message RecognitionOutputConfig {
12831314
oneof output {
@@ -1292,6 +1323,11 @@ message RecognitionOutputConfig {
12921323
// with just one audio file.
12931324
InlineOutputConfig inline_response_config = 2;
12941325
}
1326+
1327+
// Optional. Configuration for the format of the results stored to `output`.
1328+
// If unspecified transcripts will be written in the `NATIVE` format only.
1329+
OutputFormatConfig output_format_config = 3
1330+
[(google.api.field_behavior) = OPTIONAL];
12951331
}
12961332

12971333
// Response message for
@@ -1321,12 +1357,28 @@ message BatchRecognizeResults {
13211357
message CloudStorageResult {
13221358
// The Cloud Storage URI to which recognition results were written.
13231359
string uri = 1;
1360+
1361+
// The Cloud Storage URI to which recognition results were written as VTT
1362+
// formatted captions. This is populated only when `VTT` output is requested.
1363+
string vtt_format_uri = 2;
1364+
1365+
// The Cloud Storage URI to which recognition results were written as SRT
1366+
// formatted captions. This is populated only when `SRT` output is requested.
1367+
string srt_format_uri = 3;
13241368
}
13251369

13261370
// Final results returned inline in the recognition response.
13271371
message InlineResult {
13281372
// The transcript for the audio file.
13291373
BatchRecognizeResults transcript = 1;
1374+
1375+
// The transcript for the audio file as VTT formatted captions. This is
1376+
// populated only when `VTT` output is requested.
1377+
string vtt_captions = 2;
1378+
1379+
// The transcript for the audio file as SRT formatted captions. This is
1380+
// populated only when `SRT` output is requested.
1381+
string srt_captions = 3;
13301382
}
13311383

13321384
// Final results for a single file.
@@ -1555,10 +1607,13 @@ message Config {
15551607
pattern: "projects/{project}/locations/{location}/config"
15561608
};
15571609

1558-
// Output only. The name of the config resource. There is exactly one config
1559-
// resource per project per location. The expected format is
1610+
// Output only. Identifier. The name of the config resource. There is exactly
1611+
// one config resource per project per location. The expected format is
15601612
// `projects/{project}/locations/{location}/config`.
1561-
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1613+
string name = 1 [
1614+
(google.api.field_behavior) = OUTPUT_ONLY,
1615+
(google.api.field_behavior) = IDENTIFIER
1616+
];
15621617

15631618
// Optional. An optional [KMS key
15641619
// name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if
@@ -1632,17 +1687,20 @@ message CustomClass {
16321687
DELETED = 4;
16331688
}
16341689

1635-
// Output only. The resource name of the CustomClass.
1690+
// Output only. Identifier. The resource name of the CustomClass.
16361691
// Format:
16371692
// `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1638-
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1693+
string name = 1 [
1694+
(google.api.field_behavior) = OUTPUT_ONLY,
1695+
(google.api.field_behavior) = IDENTIFIER
1696+
];
16391697

16401698
// Output only. System-assigned unique identifier for the CustomClass.
16411699
string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
16421700

1643-
// User-settable, human-readable name for the CustomClass. Must be 63
1644-
// characters or less.
1645-
string display_name = 4;
1701+
// Optional. User-settable, human-readable name for the CustomClass. Must be
1702+
// 63 characters or less.
1703+
string display_name = 4 [(google.api.field_behavior) = OPTIONAL];
16461704

16471705
// A collection of class items.
16481706
repeated ClassItem items = 5;
@@ -1666,10 +1724,10 @@ message CustomClass {
16661724
google.protobuf.Timestamp expire_time = 9
16671725
[(google.api.field_behavior) = OUTPUT_ONLY];
16681726

1669-
// Allows users to store small amounts of arbitrary data.
1727+
// Optional. Allows users to store small amounts of arbitrary data.
16701728
// Both the key and the value must be 63 characters or less each.
16711729
// At most 100 annotations.
1672-
map<string, string> annotations = 10;
1730+
map<string, string> annotations = 10 [(google.api.field_behavior) = OPTIONAL];
16731731

16741732
// Output only. This checksum is computed by the server based on the value of
16751733
// other fields. This may be sent on update, undelete, and delete requests to
@@ -1751,9 +1809,12 @@ message PhraseSet {
17511809
DELETED = 4;
17521810
}
17531811

1754-
// Output only. The resource name of the PhraseSet.
1812+
// Output only. Identifier. The resource name of the PhraseSet.
17551813
// Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
1756-
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1814+
string name = 1 [
1815+
(google.api.field_behavior) = OUTPUT_ONLY,
1816+
(google.api.field_behavior) = IDENTIFIER
1817+
];
17571818

17581819
// Output only. System-assigned unique identifier for the PhraseSet.
17591820
string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

0 commit comments

Comments
 (0)