feat: Add Lustre support to the Vertex Training Custom Job API

Google APIs · copybara-github · commit affadb606560 · 2025-12-18T14:27:55.000-08:00
docs: Update comments for machine resources and autoscaling

PiperOrigin-RevId: 846415033
diff --git a/google/cloud/aiplatform/v1/custom_job.proto b/google/cloud/aiplatform/v1/custom_job.proto
@@ -312,6 +312,10 @@ message WorkerPoolSpec {
   // Optional. List of NFS mount spec.
   repeated NfsMount nfs_mounts = 4 [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. List of Lustre mounts.
+  repeated LustreMount lustre_mounts = 9
+      [(google.api.field_behavior) = OPTIONAL];
+
   // Disk spec.
   DiskSpec disk_spec = 5;
 }
diff --git a/google/cloud/aiplatform/v1/machine_resources.proto b/google/cloud/aiplatform/v1/machine_resources.proto
@@ -85,35 +85,31 @@ message MachineSpec {
   ];
 }
 
-// A description of resources that are dedicated to a DeployedModel, and
-// that need a higher degree of manual configuration.
+// A description of resources that are dedicated to a DeployedModel or
+// DeployedIndex, and that need a higher degree of manual configuration.
 message DedicatedResources {
-  // Required. Immutable. The specification of a single machine used by the
-  // prediction.
+  // Required. Immutable. The specification of a single machine being used.
   MachineSpec machine_spec = 1 [
     (google.api.field_behavior) = REQUIRED,
     (google.api.field_behavior) = IMMUTABLE
   ];
 
-  // Required. Immutable. The minimum number of machine replicas this
-  // DeployedModel will be always deployed on. This value must be greater than
-  // or equal to 1.
+  // Required. Immutable. The minimum number of machine replicas that will be
+  // always deployed on. This value must be greater than or equal to 1.
   //
-  // If traffic against the DeployedModel increases, it may dynamically be
-  // deployed onto more replicas, and as traffic decreases, some of these extra
-  // replicas may be freed.
+  // If traffic increases, it may dynamically be deployed onto more replicas,
+  // and as traffic decreases, some of these extra replicas may be freed.
   int32 min_replica_count = 2 [
     (google.api.field_behavior) = REQUIRED,
     (google.api.field_behavior) = IMMUTABLE
   ];
 
-  // Immutable. The maximum number of replicas this DeployedModel may be
-  // deployed on when the traffic against it increases. If the requested value
-  // is too large, the deployment will error, but if deployment succeeds then
-  // the ability to scale the model to that many replicas is guaranteed (barring
-  // service outages). If traffic against the DeployedModel increases beyond
-  // what its replicas at maximum may handle, a portion of the traffic will be
-  // dropped. If this value is not provided, will use
+  // Immutable. The maximum number of replicas that may be deployed on when the
+  // traffic against it increases. If the requested value is too large, the
+  // deployment will error, but if deployment succeeds then the ability to scale
+  // to that many replicas is guaranteed (barring service outages). If traffic
+  // increases beyond what its replicas at maximum may handle, a portion of the
+  // traffic will be dropped. If this value is not provided, will use
   // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
   // as the default value.
   //
@@ -124,8 +120,8 @@ message DedicatedResources {
   int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
 
   // Optional. Number of required available replicas for the deployment to
-  // succeed. This field is only needed when partial model deployment/mutation
-  // is desired. If set, the model deploy/mutate operation will succeed once
+  // succeed. This field is only needed when partial deployment/mutation is
+  // desired. If set, the deploy/mutate operation will succeed once
   // available_replica_count reaches required_replica_count, and the rest of
   // the replicas will be retried. If not set, the default
   // required_replica_count will be min_replica_count.
@@ -166,23 +162,22 @@ message DedicatedResources {
 // and require only a modest additional configuration.
 // Each Model supporting these resources documents its specific guidelines.
 message AutomaticResources {
-  // Immutable. The minimum number of replicas this DeployedModel will be always
-  // deployed on. If traffic against it increases, it may dynamically be
-  // deployed onto more replicas up to
+  // Immutable. The minimum number of replicas that will be always deployed on.
+  // If traffic against it increases, it may dynamically be deployed onto more
+  // replicas up to
   // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
   // and as traffic decreases, some of these extra replicas may be freed. If the
   // requested value is too large, the deployment will error.
   int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
 
-  // Immutable. The maximum number of replicas this DeployedModel may be
-  // deployed on when the traffic against it increases. If the requested value
-  // is too large, the deployment will error, but if deployment succeeds then
-  // the ability to scale the model to that many replicas is guaranteed (barring
-  // service outages). If traffic against the DeployedModel increases beyond
-  // what its replicas at maximum may handle, a portion of the traffic will be
-  // dropped. If this value is not provided, a no upper bound for scaling under
-  // heavy traffic will be assume, though Vertex AI may be unable to scale
-  // beyond certain replica number.
+  // Immutable. The maximum number of replicas that may be deployed on when the
+  // traffic against it increases. If the requested value is too large, the
+  // deployment will error, but if deployment succeeds then the ability to scale
+  // to that many replicas is guaranteed (barring service outages). If traffic
+  // increases beyond what its replicas at maximum may handle, a portion of the
+  // traffic will be dropped. If this value is not provided, a no upper bound
+  // for scaling under heavy traffic will be assume, though Vertex AI may be
+  // unable to scale beyond certain replica number.
   int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
 }
 
@@ -215,9 +210,10 @@ message ResourcesConsumed {
 
 // Represents the spec of disk options.
 message DiskSpec {
-  // Type of the boot disk (default is "pd-ssd").
-  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
-  // "pd-standard" (Persistent Disk Hard Disk Drive).
+  // Type of the boot disk. For non-A3U machines, the default value is
+  // "pd-ssd", for A3U machines, the default value is "hyperdisk-balanced".
+  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive),
+  // "pd-standard" (Persistent Disk Hard Disk Drive) or "hyperdisk-balanced".
   string boot_disk_type = 1;
 
   // Size in GB of the boot disk (default is 100GB).
@@ -253,6 +249,22 @@ message NfsMount {
   string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
 }
 
+// Represents a mount configuration for Lustre file system.
+message LustreMount {
+  // Required. IP address of the Lustre instance.
+  string instance_ip = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. The unique identifier of the Lustre volume.
+  string volume_handle = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. The name of the Lustre filesystem.
+  string filesystem = 3 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Destination mount path. The Lustre file system will be mounted
+  // for the user under /mnt/lustre/<mount_point>
+  string mount_point = 4 [(google.api.field_behavior) = REQUIRED];
+}
+
 // The metric specification that defines the target resource utilization
 // (CPU utilization, accelerator's duty cycle, and so on) for calculating the
 // desired replica count.
@@ -263,6 +275,7 @@ message AutoscalingMetricSpec {
   // * For Online Prediction:
   // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
   // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
+  // * `aiplatform.googleapis.com/prediction/online/request_count`
   string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
 
   // The target resource utilization in percentage (1% - 100%) for the given