Skip to content

Commit affadb6

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add Lustre support to the Vertex Training Custom Job API
docs: Update comments for machine resources and autoscaling PiperOrigin-RevId: 846415033
1 parent bc7e3ba commit affadb6

File tree

2 files changed

+51
-34
lines changed

2 files changed

+51
-34
lines changed

google/cloud/aiplatform/v1/custom_job.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,10 @@ message WorkerPoolSpec {
312312
// Optional. List of NFS mount spec.
313313
repeated NfsMount nfs_mounts = 4 [(google.api.field_behavior) = OPTIONAL];
314314

315+
// Optional. List of Lustre mounts.
316+
repeated LustreMount lustre_mounts = 9
317+
[(google.api.field_behavior) = OPTIONAL];
318+
315319
// Disk spec.
316320
DiskSpec disk_spec = 5;
317321
}

google/cloud/aiplatform/v1/machine_resources.proto

Lines changed: 47 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -85,35 +85,31 @@ message MachineSpec {
8585
];
8686
}
8787

88-
// A description of resources that are dedicated to a DeployedModel, and
89-
// that need a higher degree of manual configuration.
88+
// A description of resources that are dedicated to a DeployedModel or
89+
// DeployedIndex, and that need a higher degree of manual configuration.
9090
message DedicatedResources {
91-
// Required. Immutable. The specification of a single machine used by the
92-
// prediction.
91+
// Required. Immutable. The specification of a single machine being used.
9392
MachineSpec machine_spec = 1 [
9493
(google.api.field_behavior) = REQUIRED,
9594
(google.api.field_behavior) = IMMUTABLE
9695
];
9796

98-
// Required. Immutable. The minimum number of machine replicas this
99-
// DeployedModel will be always deployed on. This value must be greater than
100-
// or equal to 1.
97+
// Required. Immutable. The minimum number of machine replicas that will be
98+
// always deployed on. This value must be greater than or equal to 1.
10199
//
102-
// If traffic against the DeployedModel increases, it may dynamically be
103-
// deployed onto more replicas, and as traffic decreases, some of these extra
104-
// replicas may be freed.
100+
// If traffic increases, it may dynamically be deployed onto more replicas,
101+
// and as traffic decreases, some of these extra replicas may be freed.
105102
int32 min_replica_count = 2 [
106103
(google.api.field_behavior) = REQUIRED,
107104
(google.api.field_behavior) = IMMUTABLE
108105
];
109106

110-
// Immutable. The maximum number of replicas this DeployedModel may be
111-
// deployed on when the traffic against it increases. If the requested value
112-
// is too large, the deployment will error, but if deployment succeeds then
113-
// the ability to scale the model to that many replicas is guaranteed (barring
114-
// service outages). If traffic against the DeployedModel increases beyond
115-
// what its replicas at maximum may handle, a portion of the traffic will be
116-
// dropped. If this value is not provided, will use
107+
// Immutable. The maximum number of replicas that may be deployed on when the
108+
// traffic against it increases. If the requested value is too large, the
109+
// deployment will error, but if deployment succeeds then the ability to scale
110+
// to that many replicas is guaranteed (barring service outages). If traffic
111+
// increases beyond what its replicas at maximum may handle, a portion of the
112+
// traffic will be dropped. If this value is not provided, will use
117113
// [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
118114
// as the default value.
119115
//
@@ -124,8 +120,8 @@ message DedicatedResources {
124120
int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
125121

126122
// Optional. Number of required available replicas for the deployment to
127-
// succeed. This field is only needed when partial model deployment/mutation
128-
// is desired. If set, the model deploy/mutate operation will succeed once
123+
// succeed. This field is only needed when partial deployment/mutation is
124+
// desired. If set, the deploy/mutate operation will succeed once
129125
// available_replica_count reaches required_replica_count, and the rest of
130126
// the replicas will be retried. If not set, the default
131127
// required_replica_count will be min_replica_count.
@@ -166,23 +162,22 @@ message DedicatedResources {
166162
// and require only a modest additional configuration.
167163
// Each Model supporting these resources documents its specific guidelines.
168164
message AutomaticResources {
169-
// Immutable. The minimum number of replicas this DeployedModel will be always
170-
// deployed on. If traffic against it increases, it may dynamically be
171-
// deployed onto more replicas up to
165+
// Immutable. The minimum number of replicas that will be always deployed on.
166+
// If traffic against it increases, it may dynamically be deployed onto more
167+
// replicas up to
172168
// [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
173169
// and as traffic decreases, some of these extra replicas may be freed. If the
174170
// requested value is too large, the deployment will error.
175171
int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
176172

177-
// Immutable. The maximum number of replicas this DeployedModel may be
178-
// deployed on when the traffic against it increases. If the requested value
179-
// is too large, the deployment will error, but if deployment succeeds then
180-
// the ability to scale the model to that many replicas is guaranteed (barring
181-
// service outages). If traffic against the DeployedModel increases beyond
182-
// what its replicas at maximum may handle, a portion of the traffic will be
183-
// dropped. If this value is not provided, a no upper bound for scaling under
184-
// heavy traffic will be assume, though Vertex AI may be unable to scale
185-
// beyond certain replica number.
173+
// Immutable. The maximum number of replicas that may be deployed on when the
174+
// traffic against it increases. If the requested value is too large, the
175+
// deployment will error, but if deployment succeeds then the ability to scale
176+
// to that many replicas is guaranteed (barring service outages). If traffic
177+
// increases beyond what its replicas at maximum may handle, a portion of the
178+
// traffic will be dropped. If this value is not provided, a no upper bound
179+
// for scaling under heavy traffic will be assume, though Vertex AI may be
180+
// unable to scale beyond certain replica number.
186181
int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
187182
}
188183

@@ -215,9 +210,10 @@ message ResourcesConsumed {
215210

216211
// Represents the spec of disk options.
217212
message DiskSpec {
218-
// Type of the boot disk (default is "pd-ssd").
219-
// Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
220-
// "pd-standard" (Persistent Disk Hard Disk Drive).
213+
// Type of the boot disk. For non-A3U machines, the default value is
214+
// "pd-ssd", for A3U machines, the default value is "hyperdisk-balanced".
215+
// Valid values: "pd-ssd" (Persistent Disk Solid State Drive),
216+
// "pd-standard" (Persistent Disk Hard Disk Drive) or "hyperdisk-balanced".
221217
string boot_disk_type = 1;
222218

223219
// Size in GB of the boot disk (default is 100GB).
@@ -253,6 +249,22 @@ message NfsMount {
253249
string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
254250
}
255251

252+
// Represents a mount configuration for Lustre file system.
253+
message LustreMount {
254+
// Required. IP address of the Lustre instance.
255+
string instance_ip = 1 [(google.api.field_behavior) = REQUIRED];
256+
257+
// Required. The unique identifier of the Lustre volume.
258+
string volume_handle = 2 [(google.api.field_behavior) = REQUIRED];
259+
260+
// Required. The name of the Lustre filesystem.
261+
string filesystem = 3 [(google.api.field_behavior) = REQUIRED];
262+
263+
// Required. Destination mount path. The Lustre file system will be mounted
264+
// for the user under /mnt/lustre/<mount_point>
265+
string mount_point = 4 [(google.api.field_behavior) = REQUIRED];
266+
}
267+
256268
// The metric specification that defines the target resource utilization
257269
// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
258270
// desired replica count.
@@ -263,6 +275,7 @@ message AutoscalingMetricSpec {
263275
// * For Online Prediction:
264276
// * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
265277
// * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
278+
// * `aiplatform.googleapis.com/prediction/online/request_count`
266279
string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
267280

268281
// The target resource utilization in percentage (1% - 100%) for the given

0 commit comments

Comments
 (0)