Skip to content

Commit a0e077c

Browse files
authored
Remove face detection feature from V1 client (via synth). (#8666)
Closes #8661.
1 parent f67752e commit a0e077c

6 files changed

Lines changed: 273 additions & 637 deletions

File tree

videointelligence/google/cloud/videointelligence_v1/gapic/enums.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ class Feature(enum.IntEnum):
2828
LABEL_DETECTION (int): Label detection. Detect objects, such as dog or flower.
2929
SHOT_CHANGE_DETECTION (int): Shot change detection.
3030
EXPLICIT_CONTENT_DETECTION (int): Explicit content detection.
31-
FACE_DETECTION (int): Human face detection and tracking.
3231
SPEECH_TRANSCRIPTION (int): Speech transcription.
3332
TEXT_DETECTION (int): OCR text detection and tracking.
3433
OBJECT_TRACKING (int): Object detection and tracking.
@@ -38,7 +37,6 @@ class Feature(enum.IntEnum):
3837
LABEL_DETECTION = 1
3938
SHOT_CHANGE_DETECTION = 2
4039
EXPLICIT_CONTENT_DETECTION = 3
41-
FACE_DETECTION = 4
4240
SPEECH_TRANSCRIPTION = 6
4341
TEXT_DETECTION = 7
4442
OBJECT_TRACKING = 9

videointelligence/google/cloud/videointelligence_v1/gapic/video_intelligence_service_client_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
},
1919
"methods": {
2020
"AnnotateVideo": {
21-
"timeout_millis": 600000,
21+
"timeout_millis": 60000,
2222
"retry_codes_name": "idempotent",
2323
"retry_params_name": "default",
2424
}

videointelligence/google/cloud/videointelligence_v1/proto/video_intelligence.proto

Lines changed: 95 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 Google LLC.
1+
// Copyright 2019 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ import "google/longrunning/operations.proto";
2222
import "google/protobuf/duration.proto";
2323
import "google/protobuf/timestamp.proto";
2424
import "google/rpc/status.proto";
25+
import "google/api/client.proto";
2526

2627
option csharp_namespace = "Google.Cloud.VideoIntelligence.V1";
2728
option go_package = "google.golang.org/genproto/googleapis/cloud/videointelligence/v1;videointelligence";
@@ -33,12 +34,14 @@ option ruby_package = "Google::Cloud::VideoIntelligence::V1";
3334

3435
// Service that implements Google Cloud Video Intelligence API.
3536
service VideoIntelligenceService {
37+
option (google.api.default_host) = "videointelligence.googleapis.com";
38+
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
39+
3640
// Performs asynchronous video annotation. Progress and results can be
3741
// retrieved through the `google.longrunning.Operations` interface.
3842
// `Operation.metadata` contains `AnnotateVideoProgress` (progress).
3943
// `Operation.response` contains `AnnotateVideoResponse` (results).
40-
rpc AnnotateVideo(AnnotateVideoRequest)
41-
returns (google.longrunning.Operation) {
44+
rpc AnnotateVideo(AnnotateVideoRequest) returns (google.longrunning.Operation) {
4245
option (google.api.http) = {
4346
post: "/v1/videos:annotate"
4447
body: "*"
@@ -52,10 +55,10 @@ message AnnotateVideoRequest {
5255
// [Google Cloud Storage](https://cloud.google.com/storage/) URIs are
5356
// supported, which must be specified in the following format:
5457
// `gs://bucket-id/object-id` (other URI formats return
55-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
56-
// more information, see [Request URIs](/storage/docs/reference-uris). A video
57-
// URI may include wildcards in `object-id`, and thus identify multiple
58-
// videos. Supported wildcards: '*' to match 0 or more characters;
58+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
59+
// [Request URIs](/storage/docs/reference-uris).
60+
// A video URI may include wildcards in `object-id`, and thus identify
61+
// multiple videos. Supported wildcards: '*' to match 0 or more characters;
5962
// '?' to match 1 character. If unset, the input video should be embedded
6063
// in the request as `input_content`. If set, `input_content` should be unset.
6164
string input_uri = 1;
@@ -75,8 +78,8 @@ message AnnotateVideoRequest {
7578
// Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
7679
// URIs are supported, which must be specified in the following format:
7780
// `gs://bucket-id/object-id` (other URI formats return
78-
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
79-
// more information, see [Request URIs](/storage/docs/reference-uris).
81+
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
82+
// [Request URIs](/storage/docs/reference-uris).
8083
string output_uri = 4;
8184

8285
// Optional cloud region where annotation should take place. Supported cloud
@@ -101,9 +104,6 @@ message VideoContext {
101104
// Config for EXPLICIT_CONTENT_DETECTION.
102105
ExplicitContentDetectionConfig explicit_content_detection_config = 4;
103106

104-
// Config for FACE_DETECTION.
105-
FaceDetectionConfig face_detection_config = 5;
106-
107107
// Config for SPEECH_TRANSCRIPTION.
108108
SpeechTranscriptionConfig speech_transcription_config = 6;
109109

@@ -114,6 +114,66 @@ message VideoContext {
114114
ObjectTrackingConfig object_tracking_config = 13;
115115
}
116116

117+
// Video annotation feature.
118+
enum Feature {
119+
// Unspecified.
120+
FEATURE_UNSPECIFIED = 0;
121+
122+
// Label detection. Detect objects, such as dog or flower.
123+
LABEL_DETECTION = 1;
124+
125+
// Shot change detection.
126+
SHOT_CHANGE_DETECTION = 2;
127+
128+
// Explicit content detection.
129+
EXPLICIT_CONTENT_DETECTION = 3;
130+
131+
// Speech transcription.
132+
SPEECH_TRANSCRIPTION = 6;
133+
134+
// OCR text detection and tracking.
135+
TEXT_DETECTION = 7;
136+
137+
// Object detection and tracking.
138+
OBJECT_TRACKING = 9;
139+
}
140+
141+
// Label detection mode.
142+
enum LabelDetectionMode {
143+
// Unspecified.
144+
LABEL_DETECTION_MODE_UNSPECIFIED = 0;
145+
146+
// Detect shot-level labels.
147+
SHOT_MODE = 1;
148+
149+
// Detect frame-level labels.
150+
FRAME_MODE = 2;
151+
152+
// Detect both shot-level and frame-level labels.
153+
SHOT_AND_FRAME_MODE = 3;
154+
}
155+
156+
// Bucketized representation of likelihood.
157+
enum Likelihood {
158+
// Unspecified likelihood.
159+
LIKELIHOOD_UNSPECIFIED = 0;
160+
161+
// Very unlikely.
162+
VERY_UNLIKELY = 1;
163+
164+
// Unlikely.
165+
UNLIKELY = 2;
166+
167+
// Possible.
168+
POSSIBLE = 3;
169+
170+
// Likely.
171+
LIKELY = 4;
172+
173+
// Very likely.
174+
VERY_LIKELY = 5;
175+
}
176+
117177
// Config for LABEL_DETECTION.
118178
message LabelDetectionConfig {
119179
// What labels should be detected with LABEL_DETECTION, in addition to
@@ -156,28 +216,17 @@ message ShotChangeDetectionConfig {
156216
string model = 1;
157217
}
158218

159-
// Config for EXPLICIT_CONTENT_DETECTION.
160-
message ExplicitContentDetectionConfig {
161-
// Model to use for explicit content detection.
162-
// Supported values: "builtin/stable" (the default if unset) and
163-
// "builtin/latest".
164-
string model = 1;
165-
}
166-
167-
// Config for FACE_DETECTION.
168-
message FaceDetectionConfig {
169-
// Model to use for face detection.
219+
// Config for OBJECT_TRACKING.
220+
message ObjectTrackingConfig {
221+
// Model to use for object tracking.
170222
// Supported values: "builtin/stable" (the default if unset) and
171223
// "builtin/latest".
172224
string model = 1;
173-
174-
// Whether bounding boxes be included in the face annotation output.
175-
bool include_bounding_boxes = 2;
176225
}
177226

178-
// Config for OBJECT_TRACKING.
179-
message ObjectTrackingConfig {
180-
// Model to use for object tracking.
227+
// Config for EXPLICIT_CONTENT_DETECTION.
228+
message ExplicitContentDetectionConfig {
229+
// Model to use for explicit content detection.
181230
// Supported values: "builtin/stable" (the default if unset) and
182231
// "builtin/latest".
183232
string model = 1;
@@ -295,57 +344,24 @@ message NormalizedBoundingBox {
295344
float bottom = 4;
296345
}
297346

298-
// Video segment level annotation results for face detection.
299-
message FaceSegment {
300-
// Video segment where a face was detected.
301-
VideoSegment segment = 1;
302-
}
303-
304-
// Video frame level annotation results for face detection.
305-
message FaceFrame {
306-
// Normalized Bounding boxes in a frame.
307-
// There can be more than one boxes if the same face is detected in multiple
308-
// locations within the current frame.
309-
repeated NormalizedBoundingBox normalized_bounding_boxes = 1;
310-
311-
// Time-offset, relative to the beginning of the video,
312-
// corresponding to the video frame for this location.
313-
google.protobuf.Duration time_offset = 2;
314-
}
315-
316-
// Face annotation.
317-
message FaceAnnotation {
318-
// Thumbnail of a representative face view (in JPEG format).
319-
bytes thumbnail = 1;
320-
321-
// All video segments where a face was detected.
322-
repeated FaceSegment segments = 2;
323-
324-
// All video frames where a face was detected.
325-
repeated FaceFrame frames = 3;
326-
}
327-
328347
// Annotation results for a single video.
329348
message VideoAnnotationResults {
330349
// Video file location in
331350
// [Google Cloud Storage](https://cloud.google.com/storage/).
332351
string input_uri = 1;
333352

334-
// Label annotations on video level or user specified segment level.
353+
// Topical label annotations on video level or user specified segment level.
335354
// There is exactly one element for each unique label.
336355
repeated LabelAnnotation segment_label_annotations = 2;
337356

338-
// Label annotations on shot level.
357+
// Topical label annotations on shot level.
339358
// There is exactly one element for each unique label.
340359
repeated LabelAnnotation shot_label_annotations = 3;
341360

342361
// Label annotations on frame level.
343362
// There is exactly one element for each unique label.
344363
repeated LabelAnnotation frame_label_annotations = 4;
345364

346-
// Face annotations. There is exactly one element for each unique face.
347-
repeated FaceAnnotation face_annotations = 5;
348-
349365
// Shot annotations. Each shot is represented as a video segment.
350366
repeated VideoSegment shot_annotations = 6;
351367

@@ -391,6 +407,14 @@ message VideoAnnotationProgress {
391407

392408
// Time of the most recent update.
393409
google.protobuf.Timestamp update_time = 4;
410+
411+
// Specifies which feature is being tracked if the request contains more than
412+
// one features.
413+
Feature feature = 5;
414+
415+
// Specifies which segment is being tracked if the request contains more than
416+
// one segments.
417+
VideoSegment segment = 6;
394418
}
395419

396420
// Video annotation progress. Included in the `metadata`
@@ -491,15 +515,17 @@ message SpeechRecognitionAlternative {
491515
// Transcript text representing the words that the user spoke.
492516
string transcript = 1;
493517

494-
// The confidence estimate between 0.0 and 1.0. A higher number
518+
// Output only. The confidence estimate between 0.0 and 1.0. A higher number
495519
// indicates an estimated greater likelihood that the recognized words are
496-
// correct. This field is typically provided only for the top hypothesis, and
497-
// only for `is_final=true` results. Clients should not rely on the
498-
// `confidence` field as it is not guaranteed to be accurate or consistent.
520+
// correct. This field is set only for the top alternative.
521+
// This field is not guaranteed to be accurate and users should not rely on it
522+
// to be always provided.
499523
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
500524
float confidence = 2;
501525

502-
// A list of word-specific information for each recognized word.
526+
// Output only. A list of word-specific information for each recognized word.
527+
// Note: When `enable_speaker_diarization` is true, you will see all the words
528+
// from the beginning of the audio.
503529
repeated WordInfo words = 3;
504530
}
505531

@@ -645,66 +671,3 @@ message ObjectTrackingAnnotation {
645671
// Streaming mode: it can only be one ObjectTrackingFrame message in frames.
646672
repeated ObjectTrackingFrame frames = 2;
647673
}
648-
649-
// Video annotation feature.
650-
enum Feature {
651-
// Unspecified.
652-
FEATURE_UNSPECIFIED = 0;
653-
654-
// Label detection. Detect objects, such as dog or flower.
655-
LABEL_DETECTION = 1;
656-
657-
// Shot change detection.
658-
SHOT_CHANGE_DETECTION = 2;
659-
660-
// Explicit content detection.
661-
EXPLICIT_CONTENT_DETECTION = 3;
662-
663-
// Human face detection and tracking.
664-
FACE_DETECTION = 4;
665-
666-
// Speech transcription.
667-
SPEECH_TRANSCRIPTION = 6;
668-
669-
// OCR text detection and tracking.
670-
TEXT_DETECTION = 7;
671-
672-
// Object detection and tracking.
673-
OBJECT_TRACKING = 9;
674-
}
675-
676-
// Label detection mode.
677-
enum LabelDetectionMode {
678-
// Unspecified.
679-
LABEL_DETECTION_MODE_UNSPECIFIED = 0;
680-
681-
// Detect shot-level labels.
682-
SHOT_MODE = 1;
683-
684-
// Detect frame-level labels.
685-
FRAME_MODE = 2;
686-
687-
// Detect both shot-level and frame-level labels.
688-
SHOT_AND_FRAME_MODE = 3;
689-
}
690-
691-
// Bucketized representation of likelihood.
692-
enum Likelihood {
693-
// Unspecified likelihood.
694-
LIKELIHOOD_UNSPECIFIED = 0;
695-
696-
// Very unlikely.
697-
VERY_UNLIKELY = 1;
698-
699-
// Unlikely.
700-
UNLIKELY = 2;
701-
702-
// Possible.
703-
POSSIBLE = 3;
704-
705-
// Likely.
706-
LIKELY = 4;
707-
708-
// Very likely.
709-
VERY_LIKELY = 5;
710-
}

0 commit comments

Comments
 (0)