Skip to content

Commit e6660b8

Browse files
Adding support for experimental schema autodetection feature within BigQuery
1 parent 2404d30 commit e6660b8

7 files changed

Lines changed: 92 additions & 6 deletions

File tree

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) {
6161
private final Integer maxBadRecords;
6262
private final Boolean ignoreUnknownValues;
6363
private final String compression;
64+
private final Boolean autodetect;
6465

6566
public static final class Builder
6667
extends TableDefinition.Builder<ExternalTableDefinition, Builder> {
@@ -70,6 +71,7 @@ public static final class Builder
7071
private Integer maxBadRecords;
7172
private Boolean ignoreUnknownValues;
7273
private String compression;
74+
private Boolean autodetect;
7375

7476
private Builder() {
7577
super(Type.EXTERNAL);
@@ -82,6 +84,7 @@ private Builder(ExternalTableDefinition tableDefinition) {
8284
this.maxBadRecords = tableDefinition.maxBadRecords;
8385
this.ignoreUnknownValues = tableDefinition.ignoreUnknownValues;
8486
this.compression = tableDefinition.compression;
87+
this.autodetect = tableDefinition.autodetect;
8588
}
8689

8790
private Builder(Table tablePb) {
@@ -101,6 +104,7 @@ private Builder(Table tablePb) {
101104
this.formatOptions = CsvOptions.fromPb(externalDataConfiguration.getCsvOptions());
102105
}
103106
this.maxBadRecords = externalDataConfiguration.getMaxBadRecords();
107+
this.autodetect = externalDataConfiguration.getAutodetect();
104108
}
105109
}
106110

@@ -170,6 +174,15 @@ public Builder setCompression(String compression) {
170174
return this;
171175
}
172176

177+
/**
178+
* [Experimental] Sets detection of schema and format options automatically. Any option specified explicitly will
179+
* be honored.
180+
*/
181+
public Builder setAutodetect(Boolean autodetect) {
182+
this.autodetect = autodetect;
183+
return this;
184+
}
185+
173186
/**
174187
* Creates an {@code ExternalTableDefinition} object.
175188
*/
@@ -186,6 +199,7 @@ private ExternalTableDefinition(Builder builder) {
186199
this.maxBadRecords = builder.maxBadRecords;
187200
this.formatOptions = builder.formatOptions;
188201
this.sourceUris = builder.sourceUris;
202+
this.autodetect = builder.autodetect;
189203
}
190204

191205

@@ -245,6 +259,13 @@ public <F extends FormatOptions> F getFormatOptions() {
245259
return (F) formatOptions;
246260
}
247261

262+
/**
263+
* [Experimental] Returns whether automatic detection of schema and format options should be performed.
264+
*/
265+
public Boolean getAutodetect() {
266+
return autodetect;
267+
}
268+
248269
/**
249270
* Returns a builder for the {@code ExternalTableDefinition} object.
250271
*/
@@ -260,7 +281,8 @@ ToStringHelper toStringHelper() {
260281
.add("formatOptions", formatOptions)
261282
.add("compression", compression)
262283
.add("ignoreUnknownValues", ignoreUnknownValues)
263-
.add("maxBadRecords", maxBadRecords);
284+
.add("maxBadRecords", maxBadRecords)
285+
.add("autodetect", autodetect);
264286
}
265287

266288
@Override
@@ -274,7 +296,7 @@ public final boolean equals(Object obj) {
274296
@Override
275297
public final int hashCode() {
276298
return Objects.hash(baseHashCode(), compression, ignoreUnknownValues, maxBadRecords,
277-
formatOptions, sourceUris);
299+
formatOptions, sourceUris, autodetect);
278300
}
279301

280302
@Override
@@ -308,6 +330,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
308330
if (formatOptions != null && FormatOptions.CSV.equals(formatOptions.getType())) {
309331
externalConfigurationPb.setCsvOptions(((CsvOptions) formatOptions).toPb());
310332
}
333+
if (autodetect != null) {
334+
externalConfigurationPb.setAutodetect(autodetect);
335+
}
311336
return externalConfigurationPb;
312337
}
313338

@@ -417,6 +442,9 @@ static ExternalTableDefinition fromExternalDataConfiguration(
417442
if (externalDataConfiguration.getMaxBadRecords() != null) {
418443
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
419444
}
445+
if (externalDataConfiguration.getAutodetect() != null) {
446+
builder.setAutodetect(externalDataConfiguration.getAutodetect());
447+
}
420448
return builder.build();
421449
}
422450
}

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadConfiguration.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ interface Builder {
101101
*/
102102
Builder setSchemaUpdateOptions(List<SchemaUpdateOption> schemaUpdateOptions);
103103

104+
/**
105+
* [Experimental] Sets automatic inference of the options and schema for CSV and JSON sources.
106+
*/
107+
Builder setAutodetect(Boolean autodetect);
108+
104109
LoadConfiguration build();
105110
}
106111

@@ -177,6 +182,11 @@ interface Builder {
177182
*/
178183
List<SchemaUpdateOption> getSchemaUpdateOptions();
179184

185+
/**
186+
* [Experimental] Returns whether automatic inference of the options and schema for CSV and JSON sources is set.
187+
*/
188+
Boolean getAutodetect();
189+
180190
/**
181191
* Returns a builder for the load configuration object.
182192
*/

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load
4444
private final Schema schema;
4545
private final Boolean ignoreUnknownValues;
4646
private final List<JobInfo.SchemaUpdateOption> schemaUpdateOptions;
47+
private final Boolean autodetect;
4748

4849
public static final class Builder
4950
extends JobConfiguration.Builder<LoadJobConfiguration, Builder>
@@ -59,6 +60,7 @@ public static final class Builder
5960
private Boolean ignoreUnknownValues;
6061
private List<String> projectionFields;
6162
private List<JobInfo.SchemaUpdateOption> schemaUpdateOptions;
63+
private Boolean autodetect;
6264

6365
private Builder() {
6466
super(Type.LOAD);
@@ -75,6 +77,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
7577
this.ignoreUnknownValues = loadConfiguration.ignoreUnknownValues;
7678
this.sourceUris = loadConfiguration.sourceUris;
7779
this.schemaUpdateOptions = loadConfiguration.schemaUpdateOptions;
80+
this.autodetect = loadConfiguration.autodetect;
7881
}
7982

8083
private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
@@ -129,6 +132,7 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
129132
}
130133
this.schemaUpdateOptions = schemaUpdateOptionsBuilder.build();
131134
}
135+
this.autodetect = loadConfigurationPb.getAutodetect();
132136
}
133137

134138

@@ -190,6 +194,11 @@ public Builder setSourceUris(List<String> sourceUris) {
190194
return this;
191195
}
192196

197+
public Builder setAutodetect(Boolean autodetect) {
198+
this.autodetect = autodetect;
199+
return this;
200+
}
201+
193202
@Override
194203
public Builder setSchemaUpdateOptions(List<JobInfo.SchemaUpdateOption> schemaUpdateOptions) {
195204
this.schemaUpdateOptions =
@@ -214,6 +223,7 @@ private LoadJobConfiguration(Builder builder) {
214223
this.schema = builder.schema;
215224
this.ignoreUnknownValues = builder.ignoreUnknownValues;
216225
this.schemaUpdateOptions = builder.schemaUpdateOptions;
226+
this.autodetect = builder.autodetect;
217227
}
218228

219229

@@ -278,6 +288,10 @@ public List<String> getSourceUris() {
278288
return sourceUris;
279289
}
280290

291+
public Boolean getAutodetect() {
292+
return autodetect;
293+
}
294+
281295
@Override
282296
public List<JobInfo.SchemaUpdateOption> getSchemaUpdateOptions() {
283297
return schemaUpdateOptions;
@@ -299,7 +313,8 @@ ToStringHelper toStringHelper() {
299313
.add("schema", schema)
300314
.add("ignoreUnknownValue", ignoreUnknownValues)
301315
.add("sourceUris", sourceUris)
302-
.add("schemaUpdateOptions", schemaUpdateOptions);
316+
.add("schemaUpdateOptions", schemaUpdateOptions)
317+
.add("autodetect", autodetect);
303318
}
304319

305320
@Override
@@ -363,6 +378,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
363378
}
364379
loadConfigurationPb.setSchemaUpdateOptions(schemaUpdateOptionsBuilder.build());
365380
}
381+
loadConfigurationPb.setAutodetect(autodetect);
366382
return new com.google.api.services.bigquery.model.JobConfiguration()
367383
.setLoad(loadConfigurationPb);
368384
}

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/WriteChannelConfiguration.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public final class WriteChannelConfiguration implements LoadConfiguration, Seria
4747
private final Schema schema;
4848
private final Boolean ignoreUnknownValues;
4949
private final List<SchemaUpdateOption> schemaUpdateOptions;
50+
private final Boolean autodetect;
5051

5152
public static final class Builder implements LoadConfiguration.Builder {
5253

@@ -58,6 +59,7 @@ public static final class Builder implements LoadConfiguration.Builder {
5859
private Schema schema;
5960
private Boolean ignoreUnknownValues;
6061
private List<SchemaUpdateOption> schemaUpdateOptions;
62+
private Boolean autodetect;
6163

6264
private Builder() {}
6365

@@ -70,6 +72,7 @@ private Builder(WriteChannelConfiguration writeChannelConfiguration) {
7072
this.schema = writeChannelConfiguration.schema;
7173
this.ignoreUnknownValues = writeChannelConfiguration.ignoreUnknownValues;
7274
this.schemaUpdateOptions = writeChannelConfiguration.schemaUpdateOptions;
75+
this.autodetect = writeChannelConfiguration.autodetect;
7376
}
7477

7578
private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
@@ -123,6 +126,7 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
123126
}
124127
this.schemaUpdateOptions = schemaUpdateOptionsBuilder.build();
125128
}
129+
this.autodetect = loadConfigurationPb.getAutodetect();
126130
}
127131

128132

@@ -181,6 +185,12 @@ public Builder setSchemaUpdateOptions(List<SchemaUpdateOption> schemaUpdateOptio
181185
return this;
182186
}
183187

188+
@Override
189+
public Builder setAutodetect(Boolean autodetect) {
190+
this.autodetect = autodetect;
191+
return this;
192+
}
193+
184194
@Override
185195
public WriteChannelConfiguration build() {
186196
return new WriteChannelConfiguration(this);
@@ -196,6 +206,7 @@ protected WriteChannelConfiguration(Builder builder) {
196206
this.schema = builder.schema;
197207
this.ignoreUnknownValues = builder.ignoreUnknownValues;
198208
this.schemaUpdateOptions = builder.schemaUpdateOptions;
209+
this.autodetect = builder.autodetect;
199210
}
200211

201212

@@ -257,6 +268,11 @@ public List<SchemaUpdateOption> getSchemaUpdateOptions() {
257268
return schemaUpdateOptions;
258269
}
259270

271+
@Override
272+
public Boolean getAutodetect() {
273+
return autodetect;
274+
}
275+
260276
@Override
261277
public Builder toBuilder() {
262278
return new Builder(this);
@@ -271,7 +287,8 @@ MoreObjects.ToStringHelper toStringHelper() {
271287
.add("maxBadRecords", maxBadRecords)
272288
.add("schema", schema)
273289
.add("ignoreUnknownValue", ignoreUnknownValues)
274-
.add("schemaUpdateOptions", schemaUpdateOptions);
290+
.add("schemaUpdateOptions", schemaUpdateOptions)
291+
.add("autodetect", autodetect);
275292
}
276293

277294
@Override
@@ -289,7 +306,7 @@ public boolean equals(Object obj) {
289306
@Override
290307
public int hashCode() {
291308
return Objects.hash(destinationTable, createDisposition, writeDisposition, formatOptions,
292-
maxBadRecords, schema, ignoreUnknownValues, schemaUpdateOptions);
309+
maxBadRecords, schema, ignoreUnknownValues, schemaUpdateOptions, autodetect);
293310
}
294311

295312
WriteChannelConfiguration setProjectId(String projectId) {
@@ -336,6 +353,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
336353
}
337354
loadConfigurationPb.setSchemaUpdateOptions(schemaUpdateOptionsBuilder.build());
338355
}
356+
loadConfigurationPb.setAutodetect(autodetect);
339357
return new com.google.api.services.bigquery.model.JobConfiguration()
340358
.setLoad(loadConfigurationPb);
341359
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ public class ExternalTableDefinitionTest {
4646
private static final Integer MAX_BAD_RECORDS = 42;
4747
private static final Boolean IGNORE_UNKNOWN_VALUES = true;
4848
private static final String COMPRESSION = "GZIP";
49+
private static final Boolean AUTODETECT = true;
4950
private static final CsvOptions CSV_OPTIONS = CsvOptions.newBuilder().build();
5051
private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION =
5152
ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS)
5253
.setCompression(COMPRESSION)
5354
.setIgnoreUnknownValues(IGNORE_UNKNOWN_VALUES)
5455
.setMaxBadRecords(MAX_BAD_RECORDS)
56+
.setAutodetect(AUTODETECT)
5557
.build();
5658

5759
@Test
@@ -83,6 +85,7 @@ public void testBuilder() {
8385
assertEquals(MAX_BAD_RECORDS, EXTERNAL_TABLE_DEFINITION.getMaxBadRecords());
8486
assertEquals(TABLE_SCHEMA, EXTERNAL_TABLE_DEFINITION.getSchema());
8587
assertEquals(SOURCE_URIS, EXTERNAL_TABLE_DEFINITION.getSourceUris());
88+
assertEquals(AUTODETECT, EXTERNAL_TABLE_DEFINITION.getAutodetect());
8689
}
8790

8891

@@ -106,5 +109,6 @@ private void compareExternalTableDefinition(ExternalTableDefinition expected,
106109
assertEquals(expected.getSchema(), value.getSchema());
107110
assertEquals(expected.getSourceUris(), value.getSourceUris());
108111
assertEquals(expected.hashCode(), value.hashCode());
112+
assertEquals(expected.getAutodetect(), value.getAutodetect());
109113
}
110114
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ public class LoadJobConfigurationTest {
4949
private static final List<SchemaUpdateOption> SCHEMA_UPDATE_OPTIONS =
5050
ImmutableList.of(SchemaUpdateOption.ALLOW_FIELD_ADDITION);
5151
private static final Schema TABLE_SCHEMA = Schema.of(FIELD_SCHEMA);
52+
private static final Boolean AUTODETECT = true;
5253
private static final LoadJobConfiguration LOAD_CONFIGURATION_CSV =
5354
LoadJobConfiguration.newBuilder(TABLE_ID, SOURCE_URIS)
5455
.setCreateDisposition(CREATE_DISPOSITION)
@@ -58,6 +59,7 @@ public class LoadJobConfigurationTest {
5859
.setMaxBadRecords(MAX_BAD_RECORDS)
5960
.setSchema(TABLE_SCHEMA)
6061
.setSchemaUpdateOptions(SCHEMA_UPDATE_OPTIONS)
62+
.setAutodetect(AUTODETECT)
6163
.build();
6264
private static final DatastoreBackupOptions BACKUP_OPTIONS = DatastoreBackupOptions.newBuilder()
6365
.setProjectionFields(ImmutableList.of("field_1", "field_2"))
@@ -71,6 +73,7 @@ public class LoadJobConfigurationTest {
7173
.setMaxBadRecords(MAX_BAD_RECORDS)
7274
.setSchema(TABLE_SCHEMA)
7375
.setSchemaUpdateOptions(SCHEMA_UPDATE_OPTIONS)
76+
.setAutodetect(AUTODETECT)
7477
.build();
7578

7679
@Test
@@ -153,6 +156,7 @@ private void compareLoadJobConfiguration(LoadJobConfiguration expected,
153156
assertEquals(expected.getMaxBadRecords(), value.getMaxBadRecords());
154157
assertEquals(expected.getSchema(), value.getSchema());
155158
assertEquals(expected.getDatastoreBackupOptions(), value.getDatastoreBackupOptions());
159+
assertEquals(expected.getAutodetect(), value.getAutodetect());
156160
assertEquals(expected.getSchemaUpdateOptions(), value.getSchemaUpdateOptions());
157161
}
158162
}

0 commit comments

Comments
 (0)