@@ -75,6 +75,29 @@ service PredictionService {
7575 option (google.api.method_signature ) = "endpoint,http_body" ;
7676 }
7777
78+ // Perform an unary online prediction request for Vertex first-party products
79+ // and frameworks.
80+ rpc DirectPredict (DirectPredictRequest ) returns (DirectPredictResponse ) {
81+ option (google.api.http ) = {
82+ post : "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
83+ body : "*"
84+ };
85+ }
86+
87+ // Perform an online prediction request through gRPC.
88+ rpc DirectRawPredict (DirectRawPredictRequest )
89+ returns (DirectRawPredictResponse ) {
90+ option (google.api.http ) = {
91+ post : "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
92+ body : "*"
93+ };
94+ }
95+
96+ // Perform a streaming online prediction request for Vertex first-party
97+ // products and frameworks.
98+ rpc StreamingPredict (stream StreamingPredictRequest )
99+ returns (stream StreamingPredictResponse ) {}
100+
78101 // Perform a server-side streaming online prediction request for Vertex
79102 // LLM streaming.
80103 rpc ServerStreamingPredict (StreamingPredictRequest )
@@ -89,6 +112,10 @@ service PredictionService {
89112 };
90113 }
91114
115+ // Perform a streaming online prediction request through gRPC.
116+ rpc StreamingRawPredict (stream StreamingRawPredictRequest )
117+ returns (stream StreamingRawPredictResponse ) {}
118+
92119 // Perform an online explanation.
93120 //
94121 // If
@@ -226,6 +253,69 @@ message RawPredictRequest {
226253 google.api.HttpBody http_body = 2 ;
227254}
228255
256+ // Request message for
257+ // [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
258+ message DirectPredictRequest {
259+ // Required. The name of the Endpoint requested to serve the prediction.
260+ // Format:
261+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
262+ string endpoint = 1 [
263+ (google.api.field_behavior ) = REQUIRED ,
264+ (google.api.resource_reference ) = {
265+ type : "aiplatform.googleapis.com/Endpoint"
266+ }
267+ ];
268+
269+ // The prediction input.
270+ repeated Tensor inputs = 2 ;
271+
272+ // The parameters that govern the prediction.
273+ Tensor parameters = 3 ;
274+ }
275+
276+ // Response message for
277+ // [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
278+ message DirectPredictResponse {
279+ // The prediction output.
280+ repeated Tensor outputs = 1 ;
281+
282+ // The parameters that govern the prediction.
283+ Tensor parameters = 2 ;
284+ }
285+
286+ // Request message for
287+ // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
288+ message DirectRawPredictRequest {
289+ // Required. The name of the Endpoint requested to serve the prediction.
290+ // Format:
291+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
292+ string endpoint = 1 [
293+ (google.api.field_behavior ) = REQUIRED ,
294+ (google.api.resource_reference ) = {
295+ type : "aiplatform.googleapis.com/Endpoint"
296+ }
297+ ];
298+
299+ // Fully qualified name of the API method being invoked to perform
300+ // predictions.
301+ //
302+ // Format:
303+ // `/namespace.Service/Method/`
304+ // Example:
305+ // `/tensorflow.serving.PredictionService/Predict`
306+ string method_name = 2 ;
307+
308+ // The prediction input.
309+ bytes input = 3 ;
310+ }
311+
312+ // Response message for
313+ // [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
314+ message DirectRawPredictResponse {
315+ // The prediction output.
316+ bytes output = 1 ;
317+ }
318+
229319// Request message for
230320// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
231321//
@@ -261,6 +351,50 @@ message StreamingPredictResponse {
261351 Tensor parameters = 2 ;
262352}
263353
354+ // Request message for
355+ // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
356+ //
357+ // The first message must contain
358+ // [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint]
359+ // and
360+ // [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
361+ // fields and optionally
362+ // [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
363+ // The subsequent messages must contain
364+ // [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
365+ // [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
366+ // in the subsequent messages have no effect.
367+ message StreamingRawPredictRequest {
368+ // Required. The name of the Endpoint requested to serve the prediction.
369+ // Format:
370+ // `projects/{project}/locations/{location}/endpoints/{endpoint}`
371+ string endpoint = 1 [
372+ (google.api.field_behavior ) = REQUIRED ,
373+ (google.api.resource_reference ) = {
374+ type : "aiplatform.googleapis.com/Endpoint"
375+ }
376+ ];
377+
378+ // Fully qualified name of the API method being invoked to perform
379+ // predictions.
380+ //
381+ // Format:
382+ // `/namespace.Service/Method/`
383+ // Example:
384+ // `/tensorflow.serving.PredictionService/Predict`
385+ string method_name = 2 ;
386+
387+ // The prediction input.
388+ bytes input = 3 ;
389+ }
390+
391+ // Response message for
392+ // [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
393+ message StreamingRawPredictResponse {
394+ // The prediction output.
395+ bytes output = 1 ;
396+ }
397+
264398// Request message for
265399// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
266400message ExplainRequest {
0 commit comments