feat: A new message ScaleToZeroSpec is added

Google APIs · copybara-github · commit bd4166c4286c · 2025-10-09T12:23:13.000-07:00
feat: A new field `initial_replica_count` is added to message `.google.cloud.aiplatform.v1beta1.DedicatedResources`
feat: A new field `scale_to_zero_spec` is added to message `.google.cloud.aiplatform.v1beta1.DedicatedResources`

PiperOrigin-RevId: 817290056
diff --git a/google/cloud/aiplatform/v1beta1/machine_resources.proto b/google/cloud/aiplatform/v1beta1/machine_resources.proto
@@ -96,6 +96,21 @@ message MachineSpec {
 // A description of resources that are dedicated to a DeployedModel or
 // DeployedIndex, and that need a higher degree of manual configuration.
 message DedicatedResources {
+  // Specification for scale-to-zero feature.
+  message ScaleToZeroSpec {
+    // Optional. Minimum duration that a deployment will be scaled up before
+    // traffic is evaluated for potential scale-down. [MinValue=300] (5 minutes)
+    // [MaxValue=28800] (8 hours)
+    google.protobuf.Duration min_scaleup_period = 1
+        [(google.api.field_behavior) = OPTIONAL];
+
+    // Optional. Duration of no traffic before scaling to zero.
+    // [MinValue=3600] (5 minutes)
+    // [MaxValue=28800] (8 hours)
+    google.protobuf.Duration idle_scaledown_period = 2
+        [(google.api.field_behavior) = OPTIONAL];
+  }
+
   // Required. Immutable. The specification of a single machine being used.
   MachineSpec machine_spec = 1 [
     (google.api.field_behavior) = REQUIRED,
@@ -135,6 +150,21 @@ message DedicatedResources {
   // required_replica_count will be min_replica_count.
   int32 required_replica_count = 9 [(google.api.field_behavior) = OPTIONAL];
 
+  // Immutable. Number of initial replicas being deployed on when scaling the
+  // workload up from zero or when creating the workload in case
+  // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
+  // = 0. When
+  // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
+  // > 0 (meaning that the scale-to-zero feature is not enabled),
+  // [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
+  // should not be set. When
+  // [min_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.min_replica_count]
+  // = 0 (meaning that the scale-to-zero feature is enabled),
+  // [initial_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.initial_replica_count]
+  // should be larger than zero, but no greater than
+  // [max_replica_count][google.cloud.aiplatform.v1beta1.DedicatedResources.max_replica_count].
+  int32 initial_replica_count = 6 [(google.api.field_behavior) = IMMUTABLE];
+
   // Immutable. The metric specifications that overrides a resource
   // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
   // target value (default to 60 if not set). At most one entry is allowed per
@@ -172,6 +202,10 @@ message DedicatedResources {
     (google.api.field_behavior) = IMMUTABLE,
     (google.api.field_behavior) = OPTIONAL
   ];
+
+  // Optional. Specification for scale-to-zero feature.
+  ScaleToZeroSpec scale_to_zero_spec = 11
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // A description of resources that to large degree are decided by Vertex AI,