@@ -85,35 +85,31 @@ message MachineSpec {
8585 ];
8686}
8787
88- // A description of resources that are dedicated to a DeployedModel, and
89- // that need a higher degree of manual configuration.
88+ // A description of resources that are dedicated to a DeployedModel or
89+ // DeployedIndex, and that need a higher degree of manual configuration.
9090message DedicatedResources {
91- // Required. Immutable. The specification of a single machine used by the
92- // prediction.
91+ // Required. Immutable. The specification of a single machine being used.
9392 MachineSpec machine_spec = 1 [
9493 (google.api.field_behavior ) = REQUIRED ,
9594 (google.api.field_behavior ) = IMMUTABLE
9695 ];
9796
98- // Required. Immutable. The minimum number of machine replicas this
99- // DeployedModel will be always deployed on. This value must be greater than
100- // or equal to 1.
97+ // Required. Immutable. The minimum number of machine replicas that will be
98+ // always deployed on. This value must be greater than or equal to 1.
10199 //
102- // If traffic against the DeployedModel increases, it may dynamically be
103- // deployed onto more replicas, and as traffic decreases, some of these extra
104- // replicas may be freed.
100+ // If traffic increases, it may dynamically be deployed onto more replicas,
101+ // and as traffic decreases, some of these extra replicas may be freed.
105102 int32 min_replica_count = 2 [
106103 (google.api.field_behavior ) = REQUIRED ,
107104 (google.api.field_behavior ) = IMMUTABLE
108105 ];
109106
110- // Immutable. The maximum number of replicas this DeployedModel may be
111- // deployed on when the traffic against it increases. If the requested value
112- // is too large, the deployment will error, but if deployment succeeds then
113- // the ability to scale the model to that many replicas is guaranteed (barring
114- // service outages). If traffic against the DeployedModel increases beyond
115- // what its replicas at maximum may handle, a portion of the traffic will be
116- // dropped. If this value is not provided, will use
107+ // Immutable. The maximum number of replicas that may be deployed on when the
108+ // traffic against it increases. If the requested value is too large, the
109+ // deployment will error, but if deployment succeeds then the ability to scale
110+ // to that many replicas is guaranteed (barring service outages). If traffic
111+ // increases beyond what its replicas at maximum may handle, a portion of the
112+ // traffic will be dropped. If this value is not provided, will use
117113 // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
118114 // as the default value.
119115 //
@@ -124,8 +120,8 @@ message DedicatedResources {
124120 int32 max_replica_count = 3 [(google.api.field_behavior ) = IMMUTABLE ];
125121
126122 // Optional. Number of required available replicas for the deployment to
127- // succeed. This field is only needed when partial model deployment/mutation
128- // is desired. If set, the model deploy/mutate operation will succeed once
123+ // succeed. This field is only needed when partial deployment/mutation is
124+ // desired. If set, the deploy/mutate operation will succeed once
129125 // available_replica_count reaches required_replica_count, and the rest of
130126 // the replicas will be retried. If not set, the default
131127 // required_replica_count will be min_replica_count.
@@ -166,23 +162,22 @@ message DedicatedResources {
166162// and require only a modest additional configuration.
167163// Each Model supporting these resources documents its specific guidelines.
168164message AutomaticResources {
169- // Immutable. The minimum number of replicas this DeployedModel will be always
170- // deployed on. If traffic against it increases, it may dynamically be
171- // deployed onto more replicas up to
165+ // Immutable. The minimum number of replicas that will be always deployed on.
166+ // If traffic against it increases, it may dynamically be deployed onto more
167+ // replicas up to
172168 // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
173169 // and as traffic decreases, some of these extra replicas may be freed. If the
174170 // requested value is too large, the deployment will error.
175171 int32 min_replica_count = 1 [(google.api.field_behavior ) = IMMUTABLE ];
176172
177- // Immutable. The maximum number of replicas this DeployedModel may be
178- // deployed on when the traffic against it increases. If the requested value
179- // is too large, the deployment will error, but if deployment succeeds then
180- // the ability to scale the model to that many replicas is guaranteed (barring
181- // service outages). If traffic against the DeployedModel increases beyond
182- // what its replicas at maximum may handle, a portion of the traffic will be
183- // dropped. If this value is not provided, a no upper bound for scaling under
184- // heavy traffic will be assume, though Vertex AI may be unable to scale
185- // beyond certain replica number.
173+ // Immutable. The maximum number of replicas that may be deployed on when the
174+ // traffic against it increases. If the requested value is too large, the
175+ // deployment will error, but if deployment succeeds then the ability to scale
176+ // to that many replicas is guaranteed (barring service outages). If traffic
177+ // increases beyond what its replicas at maximum may handle, a portion of the
178+ // traffic will be dropped. If this value is not provided, a no upper bound
179+ // for scaling under heavy traffic will be assume, though Vertex AI may be
180+ // unable to scale beyond certain replica number.
186181 int32 max_replica_count = 2 [(google.api.field_behavior ) = IMMUTABLE ];
187182}
188183
@@ -215,9 +210,10 @@ message ResourcesConsumed {
215210
216211// Represents the spec of disk options.
217212message DiskSpec {
218- // Type of the boot disk (default is "pd-ssd").
219- // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
220- // "pd-standard" (Persistent Disk Hard Disk Drive).
213+ // Type of the boot disk. For non-A3U machines, the default value is
214+ // "pd-ssd", for A3U machines, the default value is "hyperdisk-balanced".
215+ // Valid values: "pd-ssd" (Persistent Disk Solid State Drive),
216+ // "pd-standard" (Persistent Disk Hard Disk Drive) or "hyperdisk-balanced".
221217 string boot_disk_type = 1 ;
222218
223219 // Size in GB of the boot disk (default is 100GB).
@@ -253,6 +249,22 @@ message NfsMount {
253249 string mount_point = 3 [(google.api.field_behavior ) = REQUIRED ];
254250}
255251
252+ // Represents a mount configuration for Lustre file system.
253+ message LustreMount {
254+ // Required. IP address of the Lustre instance.
255+ string instance_ip = 1 [(google.api.field_behavior ) = REQUIRED ];
256+
257+ // Required. The unique identifier of the Lustre volume.
258+ string volume_handle = 2 [(google.api.field_behavior ) = REQUIRED ];
259+
260+ // Required. The name of the Lustre filesystem.
261+ string filesystem = 3 [(google.api.field_behavior ) = REQUIRED ];
262+
263+ // Required. Destination mount path. The Lustre file system will be mounted
264+ // for the user under /mnt/lustre/<mount_point>
265+ string mount_point = 4 [(google.api.field_behavior ) = REQUIRED ];
266+ }
267+
256268// The metric specification that defines the target resource utilization
257269// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
258270// desired replica count.
@@ -263,6 +275,7 @@ message AutoscalingMetricSpec {
263275 // * For Online Prediction:
264276 // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
265277 // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
278+ // * `aiplatform.googleapis.com/prediction/online/request_count`
266279 string metric_name = 1 [(google.api.field_behavior ) = REQUIRED ];
267280
268281 // The target resource utilization in percentage (1% - 100%) for the given
0 commit comments