Skip to content

Commit d8a3dfb

Browse files
Google APIscopybara-github
authored andcommitted
feat: Additional fields for the ClusterConfig and InstanceGroupConfig messages.
This change includes the following updates: 1. There is a new `temp_bucket` field for clusters. 2. There is a new `endpoint_config` field for clusters. 3. There is a new `preemptibility` field for instance group configs. 4. There are various updates to the doc comments. PiperOrigin-RevId: 323829608
1 parent 87c7bab commit d8a3dfb

6 files changed

Lines changed: 155 additions & 42 deletions

File tree

google/cloud/dataproc/v1/autoscaling_policies.proto

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
164164
// Bounds: [0s, 1d].
165165
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
166166

167-
// Required. Fraction of average pending memory in the last cooldown period
167+
// Required. Fraction of average YARN pending memory in the last cooldown period
168168
// for which to add workers. A scale-up factor of 1.0 will result in scaling
169169
// up so that there is no pending memory remaining after the update (more
170170
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
171171
// magnitude of scaling up (less aggressive scaling).
172+
// See [How autoscaling
173+
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
174+
// for more information.
172175
//
173176
// Bounds: [0.0, 1.0].
174177
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
175178

176-
// Required. Fraction of average pending memory in the last cooldown period
179+
// Required. Fraction of average YARN pending memory in the last cooldown period
177180
// for which to remove workers. A scale-down factor of 1 will result in
178181
// scaling down so that there is no available memory remaining after the
179182
// update (more aggressive scaling). A scale-down factor of 0 disables
180183
// removing workers, which can be beneficial for autoscaling a single job.
184+
// See [How autoscaling
185+
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
186+
// for more information.
181187
//
182188
// Bounds: [0.0, 1.0].
183189
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];

google/cloud/dataproc/v1/clusters.proto

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,17 @@ message ClusterConfig {
170170
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
171171
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
172172

173+
// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
174+
// such as Spark and MapReduce history files.
175+
// If you do not specify a temp bucket,
176+
// Dataproc will determine a Cloud Storage location (US,
177+
// ASIA, or EU) for your cluster's temp bucket according to the
178+
// Compute Engine zone where your cluster is deployed, and then create
179+
// and manage this project-level, per-location bucket. The default bucket has
180+
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
181+
// bucket.
182+
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
183+
173184
// Optional. The shared Compute Engine config settings for
174185
// all instances in a cluster.
175186
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
@@ -216,6 +227,20 @@ message ClusterConfig {
216227

217228
// Optional. Lifecycle setting for the cluster.
218229
LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];
230+
231+
// Optional. Port/endpoint configuration for this cluster
232+
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
233+
}
234+
235+
// Endpoint config for this cluster
236+
message EndpointConfig {
237+
// Output only. The map of port descriptions to URLs. Will only be populated
238+
// if enable_http_port_access is true.
239+
map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
240+
241+
// Optional. If true, enable http access to specific ports on the cluster
242+
// from external sources. Defaults to false.
243+
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
219244
}
220245

221246
// Autoscaling Policy config associated with the cluster.
@@ -288,7 +313,7 @@ message GceClusterConfig {
288313
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
289314

290315
// Optional. The [Dataproc service
291-
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
316+
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
292317
// (also see [VM Data Plane
293318
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
294319
// used by Dataproc cluster VM instances to access Google Cloud Platform
@@ -332,6 +357,27 @@ message GceClusterConfig {
332357
// The config settings for Compute Engine resources in
333358
// an instance group, such as a master or worker group.
334359
message InstanceGroupConfig {
360+
// Controls the use of
361+
// [preemptible instances]
362+
// (https://cloud.google.com/compute/docs/instances/preemptible)
363+
// within the group.
364+
enum Preemptibility {
365+
// Preemptibility is unspecified, the system will choose the
366+
// appropriate setting for each instance group.
367+
PREEMPTIBILITY_UNSPECIFIED = 0;
368+
369+
// Instances are non-preemptible.
370+
//
371+
// This option is allowed for all instance groups and is the only valid
372+
// value for Master and Worker instance groups.
373+
NON_PREEMPTIBLE = 1;
374+
375+
// Instances are preemptible.
376+
//
377+
// This option is allowed only for secondary worker groups.
378+
PREEMPTIBLE = 2;
379+
}
380+
335381
// Optional. The number of VM instances in the instance group.
336382
// For master instance groups, must be set to 1.
337383
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
@@ -382,6 +428,15 @@ message InstanceGroupConfig {
382428
// instances.
383429
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
384430

431+
// Optional. Specifies the preemptibility of the instance group.
432+
//
433+
// The default value for master and worker groups is
434+
// `NON_PREEMPTIBLE`. This default cannot be changed.
435+
//
436+
// The default value for secondary instances is
437+
// `PREEMPTIBLE`.
438+
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
439+
385440
// Output only. The config for Compute Engine Instance Group
386441
// Manager that manages this group.
387442
// This is only used for preemptible instance groups.
@@ -608,7 +663,7 @@ message KerberosConfig {
608663
message SoftwareConfig {
609664
// Optional. The version of software inside the cluster. It must be one of the
610665
// supported [Dataproc
611-
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
666+
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
612667
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
613668
// ["preview"
614669
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).

google/cloud/dataproc/v1/dataproc_v1.yaml

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,77 @@ documentation:
2828
Sets the access control policy on the specified resource. Replaces
2929
any existing policy.
3030
31-
Can return Public Errors: NOT_FOUND, INVALID_ARGUMENT and
32-
PERMISSION_DENIED
31+
Can return `NOT_FOUND`, `INVALID_ARGUMENT`, and `PERMISSION_DENIED`
32+
errors.
3333
3434
- selector: google.iam.v1.IAMPolicy.TestIamPermissions
3535
description: |-
3636
Returns permissions that a caller has on the specified resource. If the
3737
resource does not exist, this will return an empty set of
38-
permissions, not a NOT_FOUND error.
38+
permissions, not a `NOT_FOUND` error.
3939
4040
Note: This operation is designed to be used for building
4141
permission-aware UIs and command-line tools, not for authorization
4242
checking. This operation may "fail open" without warning.
4343
4444
http:
4545
rules:
46-
- selector: google.longrunning.Operations.ListOperations
47-
get: '/v1/{name=projects/*/regions/*/operations}'
48-
49-
- selector: google.longrunning.Operations.GetOperation
50-
get: '/v1/{name=projects/*/regions/*/operations/*}'
51-
52-
- selector: google.longrunning.Operations.DeleteOperation
53-
delete: '/v1/{name=projects/*/regions/*/operations/*}'
54-
46+
- selector: google.iam.v1.IAMPolicy.GetIamPolicy
47+
post: '/v1/{resource=projects/*/regions/*/clusters/*}:getIamPolicy'
48+
body: '*'
49+
additional_bindings:
50+
- post: '/v1/{resource=projects/*/regions/*/jobs/*}:getIamPolicy'
51+
body: '*'
52+
- post: '/v1/{resource=projects/*/regions/*/operations/*}:getIamPolicy'
53+
body: '*'
54+
- post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:getIamPolicy'
55+
body: '*'
56+
- post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:getIamPolicy'
57+
body: '*'
58+
- post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:getIamPolicy'
59+
body: '*'
60+
- post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:getIamPolicy'
61+
body: '*'
62+
- selector: google.iam.v1.IAMPolicy.SetIamPolicy
63+
post: '/v1/{resource=projects/*/regions/*/clusters/*}:setIamPolicy'
64+
body: '*'
65+
additional_bindings:
66+
- post: '/v1/{resource=projects/*/regions/*/jobs/*}:setIamPolicy'
67+
body: '*'
68+
- post: '/v1/{resource=projects/*/regions/*/operations/*}:setIamPolicy'
69+
body: '*'
70+
- post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:setIamPolicy'
71+
body: '*'
72+
- post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:setIamPolicy'
73+
body: '*'
74+
- post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:setIamPolicy'
75+
body: '*'
76+
- post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:setIamPolicy'
77+
body: '*'
78+
- selector: google.iam.v1.IAMPolicy.TestIamPermissions
79+
post: '/v1/{resource=projects/*/regions/*/clusters/*}:testIamPermissions'
80+
body: '*'
81+
additional_bindings:
82+
- post: '/v1/{resource=projects/*/regions/*/jobs/*}:testIamPermissions'
83+
body: '*'
84+
- post: '/v1/{resource=projects/*/regions/*/operations/*}:testIamPermissions'
85+
body: '*'
86+
- post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:testIamPermissions'
87+
body: '*'
88+
- post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:testIamPermissions'
89+
body: '*'
90+
- post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:testIamPermissions'
91+
body: '*'
92+
- post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:testIamPermissions'
93+
body: '*'
5594
- selector: google.longrunning.Operations.CancelOperation
5695
post: '/v1/{name=projects/*/regions/*/operations/*}:cancel'
96+
- selector: google.longrunning.Operations.DeleteOperation
97+
delete: '/v1/{name=projects/*/regions/*/operations/*}'
98+
- selector: google.longrunning.Operations.GetOperation
99+
get: '/v1/{name=projects/*/regions/*/operations/*}'
100+
- selector: google.longrunning.Operations.ListOperations
101+
get: '/v1/{name=projects/*/regions/*/operations}'
57102

58103
authentication:
59104
rules:

google/cloud/dataproc/v1/jobs.proto

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -224,12 +224,12 @@ message SparkJob {
224224
// Spark driver and tasks.
225225
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
226226

227-
// Optional. HCFS URIs of files to be copied to the working directory of
228-
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
227+
// Optional. HCFS URIs of files to be placed in the working directory of
228+
// each executor. Useful for naively parallel tasks.
229229
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
230230

231-
// Optional. HCFS URIs of archives to be extracted in the working directory
232-
// of Spark drivers and tasks. Supported file types:
231+
// Optional. HCFS URIs of archives to be extracted into the working directory
232+
// of each executor. Supported file types:
233233
// .jar, .tar, .tar.gz, .tgz, and .zip.
234234
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
235235

@@ -265,11 +265,12 @@ message PySparkJob {
265265
// Python driver and tasks.
266266
repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
267267

268-
// Optional. HCFS URIs of files to be copied to the working directory of
269-
// Python drivers and distributed tasks. Useful for naively parallel tasks.
268+
// Optional. HCFS URIs of files to be placed in the working directory of
269+
// each executor. Useful for naively parallel tasks.
270270
repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
271271

272-
// Optional. HCFS URIs of archives to be extracted in the working directory of
272+
// Optional. HCFS URIs of archives to be extracted into the working directory
273+
// of each executor. Supported file types:
273274
// .jar, .tar, .tar.gz, .tgz, and .zip.
274275
repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
275276

@@ -414,12 +415,12 @@ message SparkRJob {
414415
// occur that causes an incorrect job submission.
415416
repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
416417

417-
// Optional. HCFS URIs of files to be copied to the working directory of
418-
// R drivers and distributed tasks. Useful for naively parallel tasks.
418+
// Optional. HCFS URIs of files to be placed in the working directory of
419+
// each executor. Useful for naively parallel tasks.
419420
repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
420421

421-
// Optional. HCFS URIs of archives to be extracted in the working directory of
422-
// Spark drivers and tasks. Supported file types:
422+
// Optional. HCFS URIs of archives to be extracted into the working directory
423+
// of each executor. Supported file types:
423424
// .jar, .tar, .tar.gz, .tgz, and .zip.
424425
repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
425426

@@ -565,9 +566,9 @@ message JobStatus {
565566

566567
// Encapsulates the full scoping used to reference a job.
567568
message JobReference {
568-
// Required. The ID of the Google Cloud Platform project that the job
569-
// belongs to.
570-
string project_id = 1 [(google.api.field_behavior) = REQUIRED];
569+
// Optional. The ID of the Google Cloud Platform project that the job belongs to. If
570+
// specified, must match the request project ID.
571+
string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
571572

572573
// Optional. The job ID, which must be unique within the project.
573574
//

google/cloud/dataproc/v1/shared.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ option java_package = "com.google.cloud.dataproc.v1";
2525

2626
// Cluster components that can be activated.
2727
enum Component {
28-
// Unspecified component.
28+
// Unspecified component. Specifying this will cause Cluster creation to fail.
2929
COMPONENT_UNSPECIFIED = 0;
3030

3131
// The Anaconda python distribution.

google/cloud/dataproc/v1/workflow_templates.proto

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ message WorkflowTemplate {
238238
// Required. The Directed Acyclic Graph of Jobs to submit.
239239
repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];
240240

241-
// Optional. emplate parameters whose values are substituted into the
241+
// Optional. Template parameters whose values are substituted into the
242242
// template. Values for parameters must be provided when the template is
243243
// instantiated.
244244
repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
@@ -319,23 +319,29 @@ message OrderedJob {
319319

320320
// Required. The job definition.
321321
oneof job_type {
322-
HadoopJob hadoop_job = 2;
322+
// Optional. Job is a Hadoop job.
323+
HadoopJob hadoop_job = 2 [(google.api.field_behavior) = OPTIONAL];
323324

324-
SparkJob spark_job = 3;
325+
// Optional. Job is a Spark job.
326+
SparkJob spark_job = 3 [(google.api.field_behavior) = OPTIONAL];
325327

326-
PySparkJob pyspark_job = 4;
328+
// Optional. Job is a PySpark job.
329+
PySparkJob pyspark_job = 4 [(google.api.field_behavior) = OPTIONAL];
327330

328-
HiveJob hive_job = 5;
331+
// Optional. Job is a Hive job.
332+
HiveJob hive_job = 5 [(google.api.field_behavior) = OPTIONAL];
329333

330-
PigJob pig_job = 6;
334+
// Optional. Job is a Pig job.
335+
PigJob pig_job = 6 [(google.api.field_behavior) = OPTIONAL];
331336

332-
// Spark R job
333-
SparkRJob spark_r_job = 11;
337+
// Optional. Job is a SparkR job.
338+
SparkRJob spark_r_job = 11 [(google.api.field_behavior) = OPTIONAL];
334339

335-
SparkSqlJob spark_sql_job = 7;
340+
// Optional. Job is a SparkSql job.
341+
SparkSqlJob spark_sql_job = 7 [(google.api.field_behavior) = OPTIONAL];
336342

337-
// Presto job
338-
PrestoJob presto_job = 12;
343+
// Optional. Job is a Presto job.
344+
PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL];
339345
}
340346

341347
// Optional. The labels to associate with this job.

0 commit comments

Comments
 (0)