@@ -96,6 +96,21 @@ message MachineSpec {
9696// A description of resources that are dedicated to a DeployedModel or
9797// DeployedIndex, and that need a higher degree of manual configuration.
9898message DedicatedResources {
99+ // Specification for scale-to-zero feature.
100+ message ScaleToZeroSpec {
101+ // Optional. Minimum duration that a deployment will be scaled up before
102+ // traffic is evaluated for potential scale-down. [MinValue=300] (5 minutes)
103+ // [MaxValue=28800] (8 hours)
104+ google.protobuf.Duration min_scaleup_period = 1
105+ [(google.api.field_behavior ) = OPTIONAL ];
106+
107+ // Optional. Duration of no traffic before scaling to zero.
108+ // [MinValue=3600] (5 minutes)
109+ // [MaxValue=28800] (8 hours)
110+ google.protobuf.Duration idle_scaledown_period = 2
111+ [(google.api.field_behavior ) = OPTIONAL ];
112+ }
113+
99114 // Required. Immutable. The specification of a single machine being used.
100115 MachineSpec machine_spec = 1 [
101116 (google.api.field_behavior ) = REQUIRED ,
@@ -135,6 +150,21 @@ message DedicatedResources {
135150 // required_replica_count will be min_replica_count.
136151 int32 required_replica_count = 9 [(google.api.field_behavior ) = OPTIONAL ];
137152
153+ // Immutable. Number of initial replicas being deployed on when scaling the
154+ // workload up from zero or when creating the workload in case
155+ // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
156+ // = 0. When
157+ // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
158+ // > 0 (meaning that the scale-to-zero feature is not enabled),
159+ // [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
160+ // should not be set. When
161+ // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
162+ // = 0 (meaning that the scale-to-zero feature is enabled),
163+ // [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
164+ // should be larger than zero, but no greater than
165+ // [max_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.max_replica_count].
166+ int32 initial_replica_count = 6 [(google.api.field_behavior ) = IMMUTABLE ];
167+
138168 // Immutable. The metric specifications that overrides a resource
139169 // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
140170 // target value (default to 60 if not set). At most one entry is allowed per
@@ -172,6 +202,10 @@ message DedicatedResources {
172202 (google.api.field_behavior ) = IMMUTABLE ,
173203 (google.api.field_behavior ) = OPTIONAL
174204 ];
205+
206+ // Optional. Specification for scale-to-zero feature.
207+ ScaleToZeroSpec scale_to_zero_spec = 11
208+ [(google.api.field_behavior ) = OPTIONAL ];
175209}
176210
177211// A description of resources that to large degree are decided by Vertex AI,
0 commit comments