Skip to content

Commit 362f61e

Browse files
Google APIscopybara-github
authored andcommitted
feat: add grpc_ports to UploadModel ModelContainerSpec
feat: add DirectPredict to PredictionService feat: add DirectRawPredict to PredictionService feat: add StreamingPredict to PredictionService. feat: add StreamingRawPredict to PredictionService. PiperOrigin-RevId: 586417186
1 parent fce431c commit 362f61e

2 files changed

Lines changed: 145 additions & 0 deletions

File tree

google/cloud/aiplatform/v1beta1/model.proto

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,17 @@ message ModelContainerSpec {
695695
// variable](https://cloud.google.com/vertex-ai/docs/predictions/custom-container-requirements#aip-variables).)
696696
string health_route = 7 [(google.api.field_behavior) = IMMUTABLE];
697697

698+
// Immutable. List of ports to expose from the container. Vertex AI sends gRPC
699+
// prediction requests that it receives to the first port on this list. Vertex
700+
// AI also sends liveness and health checks to this port.
701+
//
702+
// If you do not specify this field, gRPC requests to the container will be
703+
// disabled.
704+
//
705+
// Vertex AI does not use ports other than the first one listed. This field
706+
// corresponds to the `ports` field of the Kubernetes Containers v1 core API.
707+
repeated Port grpc_ports = 9 [(google.api.field_behavior) = IMMUTABLE];
708+
698709
// Immutable. Deployment timeout.
699710
// Limit for deployment timeout is 2 hours.
700711
google.protobuf.Duration deployment_timeout = 10

google/cloud/aiplatform/v1beta1/prediction_service.proto

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,29 @@ service PredictionService {
7575
option (google.api.method_signature) = "endpoint,http_body";
7676
}
7777

78+
// Perform an unary online prediction request for Vertex first-party products
79+
// and frameworks.
80+
rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) {
81+
option (google.api.http) = {
82+
post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
83+
body: "*"
84+
};
85+
}
86+
87+
// Perform an online prediction request through gRPC.
88+
rpc DirectRawPredict(DirectRawPredictRequest)
89+
returns (DirectRawPredictResponse) {
90+
option (google.api.http) = {
91+
post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
92+
body: "*"
93+
};
94+
}
95+
96+
// Perform a streaming online prediction request for Vertex first-party
97+
// products and frameworks.
98+
rpc StreamingPredict(stream StreamingPredictRequest)
99+
returns (stream StreamingPredictResponse) {}
100+
78101
// Perform a server-side streaming online prediction request for Vertex
79102
// LLM streaming.
80103
rpc ServerStreamingPredict(StreamingPredictRequest)
@@ -89,6 +112,10 @@ service PredictionService {
89112
};
90113
}
91114

115+
// Perform a streaming online prediction request through gRPC.
116+
rpc StreamingRawPredict(stream StreamingRawPredictRequest)
117+
returns (stream StreamingRawPredictResponse) {}
118+
92119
// Perform an online explanation.
93120
//
94121
// If
@@ -226,6 +253,69 @@ message RawPredictRequest {
226253
google.api.HttpBody http_body = 2;
227254
}
228255

256+
// Request message for
257+
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
258+
message DirectPredictRequest {
259+
// Required. The name of the Endpoint requested to serve the prediction.
260+
// Format:
261+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
262+
string endpoint = 1 [
263+
(google.api.field_behavior) = REQUIRED,
264+
(google.api.resource_reference) = {
265+
type: "aiplatform.googleapis.com/Endpoint"
266+
}
267+
];
268+
269+
// The prediction input.
270+
repeated Tensor inputs = 2;
271+
272+
// The parameters that govern the prediction.
273+
Tensor parameters = 3;
274+
}
275+
276+
// Response message for
277+
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
278+
message DirectPredictResponse {
279+
// The prediction output.
280+
repeated Tensor outputs = 1;
281+
282+
// The parameters that govern the prediction.
283+
Tensor parameters = 2;
284+
}
285+
286+
// Request message for
287+
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
288+
message DirectRawPredictRequest {
289+
// Required. The name of the Endpoint requested to serve the prediction.
290+
// Format:
291+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
292+
string endpoint = 1 [
293+
(google.api.field_behavior) = REQUIRED,
294+
(google.api.resource_reference) = {
295+
type: "aiplatform.googleapis.com/Endpoint"
296+
}
297+
];
298+
299+
// Fully qualified name of the API method being invoked to perform
300+
// predictions.
301+
//
302+
// Format:
303+
// `/namespace.Service/Method/`
304+
// Example:
305+
// `/tensorflow.serving.PredictionService/Predict`
306+
string method_name = 2;
307+
308+
// The prediction input.
309+
bytes input = 3;
310+
}
311+
312+
// Response message for
313+
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
314+
message DirectRawPredictResponse {
315+
// The prediction output.
316+
bytes output = 1;
317+
}
318+
229319
// Request message for
230320
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
231321
//
@@ -261,6 +351,50 @@ message StreamingPredictResponse {
261351
Tensor parameters = 2;
262352
}
263353

354+
// Request message for
355+
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
356+
//
357+
// The first message must contain
358+
// [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint]
359+
// and
360+
// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
361+
// fields and optionally
362+
// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
363+
// The subsequent messages must contain
364+
// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
365+
// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
366+
// in the subsequent messages have no effect.
367+
message StreamingRawPredictRequest {
368+
// Required. The name of the Endpoint requested to serve the prediction.
369+
// Format:
370+
// `projects/{project}/locations/{location}/endpoints/{endpoint}`
371+
string endpoint = 1 [
372+
(google.api.field_behavior) = REQUIRED,
373+
(google.api.resource_reference) = {
374+
type: "aiplatform.googleapis.com/Endpoint"
375+
}
376+
];
377+
378+
// Fully qualified name of the API method being invoked to perform
379+
// predictions.
380+
//
381+
// Format:
382+
// `/namespace.Service/Method/`
383+
// Example:
384+
// `/tensorflow.serving.PredictionService/Predict`
385+
string method_name = 2;
386+
387+
// The prediction input.
388+
bytes input = 3;
389+
}
390+
391+
// Response message for
392+
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
393+
message StreamingRawPredictResponse {
394+
// The prediction output.
395+
bytes output = 1;
396+
}
397+
264398
// Request message for
265399
// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
266400
message ExplainRequest {

0 commit comments

Comments
 (0)