Skip to content

Commit bd4166c

Browse files
Google APIscopybara-github
authored andcommitted
feat: A new message ScaleToZeroSpec is added
feat: A new field `initial_replica_count` is added to message `.google.cloud.aiplatform.v1beta1.DedicatedResources` feat: A new field `scale_to_zero_spec` is added to message `.google.cloud.aiplatform.v1beta1.DedicatedResources` PiperOrigin-RevId: 817290056
1 parent a23c956 commit bd4166c

1 file changed

Lines changed: 34 additions & 0 deletions

File tree

google/cloud/aiplatform/v1beta1/machine_resources.proto

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,21 @@ message MachineSpec {
9696
// A description of resources that are dedicated to a DeployedModel or
9797
// DeployedIndex, and that need a higher degree of manual configuration.
9898
message DedicatedResources {
99+
// Specification for scale-to-zero feature.
100+
message ScaleToZeroSpec {
101+
// Optional. Minimum duration that a deployment will be scaled up before
102+
// traffic is evaluated for potential scale-down. [MinValue=300] (5 minutes)
103+
// [MaxValue=28800] (8 hours)
104+
google.protobuf.Duration min_scaleup_period = 1
105+
[(google.api.field_behavior) = OPTIONAL];
106+
107+
// Optional. Duration of no traffic before scaling to zero.
108+
// [MinValue=3600] (5 minutes)
109+
// [MaxValue=28800] (8 hours)
110+
google.protobuf.Duration idle_scaledown_period = 2
111+
[(google.api.field_behavior) = OPTIONAL];
112+
}
113+
99114
// Required. Immutable. The specification of a single machine being used.
100115
MachineSpec machine_spec = 1 [
101116
(google.api.field_behavior) = REQUIRED,
@@ -135,6 +150,21 @@ message DedicatedResources {
135150
// required_replica_count will be min_replica_count.
136151
int32 required_replica_count = 9 [(google.api.field_behavior) = OPTIONAL];
137152

153+
// Immutable. Number of initial replicas being deployed on when scaling the
154+
// workload up from zero or when creating the workload in case
155+
// [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
156+
// = 0. When
157+
// [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
158+
// > 0 (meaning that the scale-to-zero feature is not enabled),
159+
// [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
160+
// should not be set. When
161+
// [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
162+
// = 0 (meaning that the scale-to-zero feature is enabled),
163+
// [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
164+
// should be larger than zero, but no greater than
165+
// [max_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.max_replica_count].
166+
int32 initial_replica_count = 6 [(google.api.field_behavior) = IMMUTABLE];
167+
138168
// Immutable. The metric specifications that overrides a resource
139169
// utilization metric (CPU utilization, accelerator's duty cycle, and so on)
140170
// target value (default to 60 if not set). At most one entry is allowed per
@@ -172,6 +202,10 @@ message DedicatedResources {
172202
(google.api.field_behavior) = IMMUTABLE,
173203
(google.api.field_behavior) = OPTIONAL
174204
];
205+
206+
// Optional. Specification for scale-to-zero feature.
207+
ScaleToZeroSpec scale_to_zero_spec = 11
208+
[(google.api.field_behavior) = OPTIONAL];
175209
}
176210

177211
// A description of resources that to large degree are decided by Vertex AI,

0 commit comments

Comments
 (0)