Skip to content

Commit 34b323e

Browse files
Google APIscopybara-github
authored andcommitted
feat: add PSCAutomationConfig to PrivateServiceConnectConfig in service_networking.proto
feat: add GenAiAdvancedFeaturesConfig to endpoint.proto docs: update dedicateEndpointDns documentation PiperOrigin-RevId: 773868922
1 parent d5b7894 commit 34b323e

File tree

4 files changed

+166
-41
lines changed

4 files changed

+166
-41
lines changed

google/cloud/aiplatform/v1/endpoint.proto

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ message Endpoint {
162162
bool dedicated_endpoint_enabled = 24;
163163

164164
// Output only. DNS of the dedicated endpoint. Will only be populated if
165-
// dedicated_endpoint_enabled is true.
166-
// Format:
167-
// `https://{endpoint_id}.{region}-{project_number}.prediction.vertexai.goog`.
165+
// dedicated_endpoint_enabled is true. Depending on the features enabled, uid
166+
// might be a random number or a string. For example, if fast_tryout is
167+
// enabled, uid will be fasttryout. Format:
168+
// `https://{endpoint_id}.{region}-{uid}.prediction.vertexai.goog`.
168169
string dedicated_endpoint_dns = 25
169170
[(google.api.field_behavior) = OUTPUT_ONLY];
170171

@@ -176,6 +177,12 @@ message Endpoint {
176177

177178
// Output only. Reserved for future use.
178179
bool satisfies_pzi = 28 [(google.api.field_behavior) = OUTPUT_ONLY];
180+
181+
// Optional. Configuration for GenAiAdvancedFeatures. If the endpoint is
182+
// serving GenAI models, advanced features like native RAG integration can be
183+
// configured. Currently, only Model Garden models are supported.
184+
GenAiAdvancedFeaturesConfig gen_ai_advanced_features_config = 29
185+
[(google.api.field_behavior) = OPTIONAL];
179186
}
180187

181188
// A deployment of a Model. Endpoints contain one or more DeployedModels.
@@ -223,22 +230,18 @@ message DeployedModel {
223230
// This value should be 1-10 characters, and valid characters are `/[0-9]/`.
224231
string id = 1 [(google.api.field_behavior) = IMMUTABLE];
225232

226-
// Required. The resource name of the Model that this is the deployment of.
227-
// Note that the Model may be in a different location than the DeployedModel's
228-
// Endpoint.
233+
// The resource name of the Model that this is the deployment of. Note that
234+
// the Model may be in a different location than the DeployedModel's Endpoint.
229235
//
230236
// The resource name may contain version id or version alias to specify the
231237
// version.
232238
// Example: `projects/{project}/locations/{location}/models/{model}@2`
233239
// or
234240
// `projects/{project}/locations/{location}/models/{model}@golden`
235241
// if no version is specified, the default version will be deployed.
236-
string model = 2 [
237-
(google.api.field_behavior) = REQUIRED,
238-
(google.api.resource_reference) = {
239-
type: "aiplatform.googleapis.com/Model"
240-
}
241-
];
242+
string model = 2 [(google.api.resource_reference) = {
243+
type: "aiplatform.googleapis.com/Model"
244+
}];
242245

243246
// Output only. The version ID of the model that is deployed.
244247
string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY];
@@ -380,6 +383,20 @@ message FasterDeploymentConfig {
380383
bool fast_tryout_enabled = 2;
381384
}
382385

386+
// Configuration for GenAiAdvancedFeatures.
387+
message GenAiAdvancedFeaturesConfig {
388+
// Configuration for Retrieval Augmented Generation feature.
389+
message RagConfig {
390+
// If true, enable Retrieval Augmented Generation in ChatCompletion request.
391+
// Once enabled, the endpoint will be identified as GenAI endpoint and
392+
// Arthedain router will be used.
393+
bool enable_rag = 1;
394+
}
395+
396+
// Configuration for Retrieval Augmented Generation feature.
397+
RagConfig rag_config = 1;
398+
}
399+
383400
// Configuration for Speculative Decoding.
384401
message SpeculativeDecodingSpec {
385402
// Draft model speculation works by using the smaller model to generate

google/cloud/aiplatform/v1/service_networking.proto

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,41 @@ option (google.api.resource_definition) = {
3131
pattern: "projects/{project}/regions/{region}/networkAttachments/{networkattachment}"
3232
};
3333

34-
// PSC config that is used to automatically create forwarding rule via
35-
// ServiceConnectionMap.
34+
// The state of the PSC service automation.
35+
enum PSCAutomationState {
36+
// Should not be used.
37+
PSC_AUTOMATION_STATE_UNSPECIFIED = 0;
38+
39+
// The PSC service automation is successful.
40+
PSC_AUTOMATION_STATE_SUCCESSFUL = 1;
41+
42+
// The PSC service automation has failed.
43+
PSC_AUTOMATION_STATE_FAILED = 2;
44+
}
45+
46+
// PSC config that is used to automatically create PSC endpoints in the user
47+
// projects.
3648
message PSCAutomationConfig {
3749
// Required. Project id used to create forwarding rule.
3850
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
3951

4052
// Required. The full name of the Google Compute Engine
4153
// [network](https://cloud.google.com/compute/docs/networks-and-firewalls#networks).
42-
// [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
54+
// [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/get):
4355
// `projects/{project}/global/networks/{network}`.
44-
// Where {project} is a project number, as in '12345', and {network} is
45-
// network name.
4656
string network = 2 [(google.api.field_behavior) = REQUIRED];
57+
58+
// Output only. IP address rule created by the PSC service automation.
59+
string ip_address = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
60+
61+
// Output only. Forwarding rule created by the PSC service automation.
62+
string forwarding_rule = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
63+
64+
// Output only. The state of the PSC service automation.
65+
PSCAutomationState state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
66+
67+
// Output only. Error message if the PSC service automation failed.
68+
string error_message = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
4769
}
4870

4971
// Represents configuration for private service connect.
@@ -56,6 +78,11 @@ message PrivateServiceConnectConfig {
5678
// attachment.
5779
repeated string project_allowlist = 2;
5880

81+
// Optional. List of projects and networks where the PSC endpoints will be
82+
// created. This field is used by Online Inference(Prediction) only.
83+
repeated PSCAutomationConfig psc_automation_configs = 3
84+
[(google.api.field_behavior) = OPTIONAL];
85+
5986
// Output only. The name of the generated service attachment resource.
6087
// This is only populated if the endpoint is deployed with
6188
// PrivateServiceConnect.

google/cloud/aiplatform/v1beta1/endpoint.proto

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,10 @@ message Endpoint {
162162
bool dedicated_endpoint_enabled = 24;
163163

164164
// Output only. DNS of the dedicated endpoint. Will only be populated if
165-
// dedicated_endpoint_enabled is true.
166-
// Format:
167-
// `https://{endpoint_id}.{region}-{project_number}.prediction.vertexai.goog`.
165+
// dedicated_endpoint_enabled is true. Depending on the features enabled, uid
166+
// might be a random number or a string. For example, if fast_tryout is
167+
// enabled, uid will be fasttryout. Format:
168+
// `https://{endpoint_id}.{region}-{uid}.prediction.vertexai.goog`.
168169
string dedicated_endpoint_dns = 25
169170
[(google.api.field_behavior) = OUTPUT_ONLY];
170171

@@ -176,6 +177,12 @@ message Endpoint {
176177

177178
// Output only. Reserved for future use.
178179
bool satisfies_pzi = 28 [(google.api.field_behavior) = OUTPUT_ONLY];
180+
181+
// Optional. Configuration for GenAiAdvancedFeatures. If the endpoint is
182+
// serving GenAI models, advanced features like native RAG integration can be
183+
// configured. Currently, only Model Garden models are supported.
184+
GenAiAdvancedFeaturesConfig gen_ai_advanced_features_config = 29
185+
[(google.api.field_behavior) = OPTIONAL];
179186
}
180187

181188
// A deployment of a Model. Endpoints contain one or more DeployedModels.
@@ -223,22 +230,18 @@ message DeployedModel {
223230
// This value should be 1-10 characters, and valid characters are `/[0-9]/`.
224231
string id = 1 [(google.api.field_behavior) = IMMUTABLE];
225232

226-
// Required. The resource name of the Model that this is the deployment of.
227-
// Note that the Model may be in a different location than the DeployedModel's
228-
// Endpoint.
233+
// The resource name of the Model that this is the deployment of. Note that
234+
// the Model may be in a different location than the DeployedModel's Endpoint.
229235
//
230236
// The resource name may contain version id or version alias to specify the
231237
// version.
232238
// Example: `projects/{project}/locations/{location}/models/{model}@2`
233239
// or
234240
// `projects/{project}/locations/{location}/models/{model}@golden`
235241
// if no version is specified, the default version will be deployed.
236-
string model = 2 [
237-
(google.api.field_behavior) = REQUIRED,
238-
(google.api.resource_reference) = {
239-
type: "aiplatform.googleapis.com/Model"
240-
}
241-
];
242+
string model = 2 [(google.api.resource_reference) = {
243+
type: "aiplatform.googleapis.com/Model"
244+
}];
242245

243246
// Output only. The version ID of the model that is deployed.
244247
string model_version_id = 18 [(google.api.field_behavior) = OUTPUT_ONLY];
@@ -324,6 +327,10 @@ message DeployedModel {
324327

325328
// The checkpoint id of the model.
326329
string checkpoint_id = 29;
330+
331+
// Optional. Spec for configuring speculative decoding.
332+
SpeculativeDecodingSpec speculative_decoding_spec = 30
333+
[(google.api.field_behavior) = OPTIONAL];
327334
}
328335

329336
// PrivateEndpoints proto is used to provide paths for users to send
@@ -425,3 +432,55 @@ message RolloutOptions {
425432
// revision number specifies the intended state of the deployment.
426433
int32 revision_number = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
427434
}
435+
436+
// Configuration for GenAiAdvancedFeatures.
437+
message GenAiAdvancedFeaturesConfig {
438+
// Configuration for Retrieval Augmented Generation feature.
439+
message RagConfig {
440+
// If true, enable Retrieval Augmented Generation in ChatCompletion request.
441+
// Once enabled, the endpoint will be identified as GenAI endpoint and
442+
// Arthedain router will be used.
443+
bool enable_rag = 1;
444+
}
445+
446+
// Configuration for Retrieval Augmented Generation feature.
447+
RagConfig rag_config = 1;
448+
}
449+
450+
// Configuration for Speculative Decoding.
451+
message SpeculativeDecodingSpec {
452+
// Draft model speculation works by using the smaller model to generate
453+
// candidate tokens for speculative decoding.
454+
message DraftModelSpeculation {
455+
// Required. The resource name of the draft model.
456+
string draft_model = 1 [
457+
(google.api.field_behavior) = REQUIRED,
458+
(google.api.resource_reference) = {
459+
type: "aiplatform.googleapis.com/Model"
460+
}
461+
];
462+
}
463+
464+
// N-Gram speculation works by trying to find matching tokens in the
465+
// previous prompt sequence and use those as speculation for generating
466+
// new tokens.
467+
message NgramSpeculation {
468+
// The number of last N input tokens used as ngram to search/match
469+
// against the previous prompt sequence.
470+
// This is equal to the N in N-Gram.
471+
// The default value is 3 if not specified.
472+
int32 ngram_size = 1;
473+
}
474+
475+
// The type of speculation method to use.
476+
oneof speculation {
477+
// draft model speculation.
478+
DraftModelSpeculation draft_model_speculation = 2;
479+
480+
// N-Gram speculation.
481+
NgramSpeculation ngram_speculation = 3;
482+
}
483+
484+
// The number of speculative tokens to generate at each step.
485+
int32 speculative_token_count = 1;
486+
}

google/cloud/aiplatform/v1beta1/service_networking.proto

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,41 @@ option (google.api.resource_definition) = {
3131
pattern: "projects/{project}/regions/{region}/networkAttachments/{networkattachment}"
3232
};
3333

34-
// PSC config that is used to automatically create forwarding rule via
35-
// ServiceConnectionMap.
34+
// The state of the PSC service automation.
35+
enum PSCAutomationState {
36+
// Should not be used.
37+
PSC_AUTOMATION_STATE_UNSPECIFIED = 0;
38+
39+
// The PSC service automation is successful.
40+
PSC_AUTOMATION_STATE_SUCCESSFUL = 1;
41+
42+
// The PSC service automation has failed.
43+
PSC_AUTOMATION_STATE_FAILED = 2;
44+
}
45+
46+
// PSC config that is used to automatically create PSC endpoints in the user
47+
// projects.
3648
message PSCAutomationConfig {
3749
// Required. Project id used to create forwarding rule.
3850
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
3951

4052
// Required. The full name of the Google Compute Engine
4153
// [network](https://cloud.google.com/compute/docs/networks-and-firewalls#networks).
42-
// [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/insert):
54+
// [Format](https://cloud.google.com/compute/docs/reference/rest/v1/networks/get):
4355
// `projects/{project}/global/networks/{network}`.
44-
// Where {project} is a project number, as in '12345', and {network} is
45-
// network name.
4656
string network = 2 [(google.api.field_behavior) = REQUIRED];
57+
58+
// Output only. IP address rule created by the PSC service automation.
59+
string ip_address = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
60+
61+
// Output only. Forwarding rule created by the PSC service automation.
62+
string forwarding_rule = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
63+
64+
// Output only. The state of the PSC service automation.
65+
PSCAutomationState state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
66+
67+
// Output only. Error message if the PSC service automation failed.
68+
string error_message = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
4769
}
4870

4971
// Represents configuration for private service connect.
@@ -56,6 +78,11 @@ message PrivateServiceConnectConfig {
5678
// attachment.
5779
repeated string project_allowlist = 2;
5880

81+
// Optional. List of projects and networks where the PSC endpoints will be
82+
// created. This field is used by Online Inference(Prediction) only.
83+
repeated PSCAutomationConfig psc_automation_configs = 3
84+
[(google.api.field_behavior) = OPTIONAL];
85+
5986
// Optional. If set to true, enable secure private service connect with IAM
6087
// authorization. Otherwise, private service connect will be done without
6188
// authorization. Note latency will be slightly increased if authorization is
@@ -84,15 +111,10 @@ message PscAutomatedEndpoints {
84111

85112
// Configuration for PSC-I.
86113
message PscInterfaceConfig {
87-
// Optional. The full name of the Compute Engine
114+
// Optional. The name of the Compute Engine
88115
// [network
89116
// attachment](https://cloud.google.com/vpc/docs/about-network-attachments) to
90-
// attach to the resource.
91-
// For example, `projects/12345/regions/us-central1/networkAttachments/myNA`.
92-
// is of the form
93-
// `projects/{project}/regions/{region}/networkAttachments/{networkAttachment}`.
94-
// Where {project} is a project number, as in `12345`, and {networkAttachment}
95-
// is a network attachment name.
117+
// attach to the resource within the region and user project.
96118
// To specify this field, you must have already [created a network attachment]
97119
// (https://cloud.google.com/vpc/docs/create-manage-network-attachments#create-network-attachments).
98120
// This field is only used for resources using PSC-I.

0 commit comments

Comments
 (0)