Skip to content

Commit 1ba92ed

Browse files
Adding support for experimental schema autodetection feature within BigQuery
1 parent e61ca31 commit 1ba92ed

7 files changed

Lines changed: 90 additions & 5 deletions

File tree

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) {
6161
private final Integer maxBadRecords;
6262
private final Boolean ignoreUnknownValues;
6363
private final String compression;
64+
private final Boolean autodetect;
6465

6566
public static final class Builder
6667
extends TableDefinition.Builder<ExternalTableDefinition, Builder> {
@@ -70,6 +71,7 @@ public static final class Builder
7071
private Integer maxBadRecords;
7172
private Boolean ignoreUnknownValues;
7273
private String compression;
74+
private Boolean autodetect;
7375

7476
private Builder() {
7577
super(Type.EXTERNAL);
@@ -82,6 +84,7 @@ private Builder(ExternalTableDefinition tableDefinition) {
8284
this.maxBadRecords = tableDefinition.maxBadRecords;
8385
this.ignoreUnknownValues = tableDefinition.ignoreUnknownValues;
8486
this.compression = tableDefinition.compression;
87+
this.autodetect = tableDefinition.autodetect;
8588
}
8689

8790
private Builder(Table tablePb) {
@@ -101,6 +104,7 @@ private Builder(Table tablePb) {
101104
this.formatOptions = CsvOptions.fromPb(externalDataConfiguration.getCsvOptions());
102105
}
103106
this.maxBadRecords = externalDataConfiguration.getMaxBadRecords();
107+
this.autodetect = externalDataConfiguration.getAutodetect();
104108
}
105109
}
106110

@@ -226,6 +230,15 @@ public Builder setCompression(String compression) {
226230
return this;
227231
}
228232

233+
/**
234+
* [Experimental] Sets detection of schema and format options automatically. Any option specified explicitly will
235+
* be honored.
236+
*/
237+
public Builder setAutodetect(Boolean autodetect) {
238+
this.autodetect = autodetect;
239+
return this;
240+
}
241+
229242
/**
230243
* Creates an {@code ExternalTableDefinition} object.
231244
*/
@@ -242,6 +255,7 @@ private ExternalTableDefinition(Builder builder) {
242255
this.maxBadRecords = builder.maxBadRecords;
243256
this.formatOptions = builder.formatOptions;
244257
this.sourceUris = builder.sourceUris;
258+
this.autodetect = builder.autodetect;
245259
}
246260

247261
/**
@@ -340,6 +354,13 @@ public <F extends FormatOptions> F getFormatOptions() {
340354
return (F) formatOptions;
341355
}
342356

357+
/**
358+
* [Experimental] Returns whether automatic detection of schema and format options should be performed.
359+
*/
360+
public Boolean getAutodetect() {
361+
return autodetect;
362+
}
363+
343364
/**
344365
* Returns a builder for the {@code ExternalTableDefinition} object.
345366
*/
@@ -355,7 +376,8 @@ ToStringHelper toStringHelper() {
355376
.add("formatOptions", formatOptions)
356377
.add("compression", compression)
357378
.add("ignoreUnknownValues", ignoreUnknownValues)
358-
.add("maxBadRecords", maxBadRecords);
379+
.add("maxBadRecords", maxBadRecords)
380+
.add("autodetect", autodetect);
359381
}
360382

361383
@Override
@@ -369,7 +391,7 @@ public final boolean equals(Object obj) {
369391
@Override
370392
public final int hashCode() {
371393
return Objects.hash(baseHashCode(), compression, ignoreUnknownValues, maxBadRecords,
372-
formatOptions, sourceUris);
394+
formatOptions, sourceUris, autodetect);
373395
}
374396

375397
@Override
@@ -403,6 +425,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC
403425
if (formatOptions != null && FormatOptions.CSV.equals(formatOptions.getType())) {
404426
externalConfigurationPb.setCsvOptions(((CsvOptions) formatOptions).toPb());
405427
}
428+
if (autodetect != null) {
429+
externalConfigurationPb.setAutodetect(autodetect);
430+
}
406431
return externalConfigurationPb;
407432
}
408433

@@ -549,6 +574,9 @@ static ExternalTableDefinition fromExternalDataConfiguration(
549574
if (externalDataConfiguration.getMaxBadRecords() != null) {
550575
builder.setMaxBadRecords(externalDataConfiguration.getMaxBadRecords());
551576
}
577+
if (externalDataConfiguration.getAutodetect() != null) {
578+
builder.setAutodetect(externalDataConfiguration.getAutodetect());
579+
}
552580
return builder.build();
553581
}
554582
}

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadConfiguration.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ interface Builder {
164164
*/
165165
Builder setProjectionFields(List<String> projectionFields);
166166

167+
/**
168+
* [Experimental] Sets automatic inference of the options and schema for CSV and JSON sources.
169+
*/
170+
Builder setAutodetect(Boolean autodetect);
171+
167172
LoadConfiguration build();
168173
}
169174

@@ -289,6 +294,11 @@ interface Builder {
289294
*/
290295
List<String> getProjectionFields();
291296

297+
/**
298+
* [Experimental] Returns whether automatic inference of the options and schema for CSV and JSON sources is set.
299+
*/
300+
Boolean getAutodetect();
301+
292302
/**
293303
* Returns a builder for the load configuration object.
294304
*/

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load
4444
private final Schema schema;
4545
private final Boolean ignoreUnknownValues;
4646
private final List<String> projectionFields;
47+
private final Boolean autodetect;
4748

4849
public static final class Builder
4950
extends JobConfiguration.Builder<LoadJobConfiguration, Builder>
@@ -58,6 +59,7 @@ public static final class Builder
5859
private Schema schema;
5960
private Boolean ignoreUnknownValues;
6061
private List<String> projectionFields;
62+
private Boolean autodetect;
6163

6264
private Builder() {
6365
super(Type.LOAD);
@@ -74,6 +76,7 @@ private Builder(LoadJobConfiguration loadConfiguration) {
7476
this.ignoreUnknownValues = loadConfiguration.ignoreUnknownValues;
7577
this.projectionFields = loadConfiguration.projectionFields;
7678
this.sourceUris = loadConfiguration.sourceUris;
79+
this.autodetect = loadConfiguration.autodetect;
7780
}
7881

7982
private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
@@ -121,6 +124,7 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
121124
if (loadConfigurationPb.getSourceUris() != null) {
122125
this.sourceUris = ImmutableList.copyOf(configurationPb.getLoad().getSourceUris());
123126
}
127+
this.autodetect = loadConfigurationPb.getAutodetect();
124128
}
125129

126130
@Override
@@ -240,6 +244,11 @@ public Builder setSourceUris(List<String> sourceUris) {
240244
return this;
241245
}
242246

247+
public Builder setAutodetect(Boolean autodetect) {
248+
this.autodetect = autodetect;
249+
return this;
250+
}
251+
243252
@Override
244253
public LoadJobConfiguration build() {
245254
return new LoadJobConfiguration(this);
@@ -257,6 +266,7 @@ private LoadJobConfiguration(Builder builder) {
257266
this.schema = builder.schema;
258267
this.ignoreUnknownValues = builder.ignoreUnknownValues;
259268
this.projectionFields = builder.projectionFields;
269+
this.autodetect = builder.autodetect;
260270
}
261271

262272
@Override
@@ -371,6 +381,10 @@ public List<String> getSourceUris() {
371381
return sourceUris;
372382
}
373383

384+
public Boolean getAutodetect() {
385+
return autodetect;
386+
}
387+
374388
@Override
375389
public Builder toBuilder() {
376390
return new Builder(this);
@@ -387,7 +401,8 @@ ToStringHelper toStringHelper() {
387401
.add("schema", schema)
388402
.add("ignoreUnknownValue", ignoreUnknownValues)
389403
.add("projectionFields", projectionFields)
390-
.add("sourceUris", sourceUris);
404+
.add("sourceUris", sourceUris)
405+
.add("autodetect", autodetect);
391406
}
392407

393408
@Override
@@ -441,6 +456,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
441456
if (sourceUris != null) {
442457
loadConfigurationPb.setSourceUris(ImmutableList.copyOf(sourceUris));
443458
}
459+
loadConfigurationPb.setAutodetect(autodetect);
444460
return new com.google.api.services.bigquery.model.JobConfiguration()
445461
.setLoad(loadConfigurationPb);
446462
}

google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/WriteChannelConfiguration.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ public final class WriteChannelConfiguration implements LoadConfiguration, Seria
4646
private final Schema schema;
4747
private final Boolean ignoreUnknownValues;
4848
private final List<String> projectionFields;
49+
private final Boolean autodetect;
4950

5051
public static final class Builder implements LoadConfiguration.Builder {
5152

@@ -57,6 +58,7 @@ public static final class Builder implements LoadConfiguration.Builder {
5758
private Schema schema;
5859
private Boolean ignoreUnknownValues;
5960
private List<String> projectionFields;
61+
private Boolean autodetect;
6062

6163
private Builder() {}
6264

@@ -69,6 +71,7 @@ private Builder(WriteChannelConfiguration writeChannelConfiguration) {
6971
this.schema = writeChannelConfiguration.schema;
7072
this.ignoreUnknownValues = writeChannelConfiguration.ignoreUnknownValues;
7173
this.projectionFields = writeChannelConfiguration.projectionFields;
74+
this.autodetect = writeChannelConfiguration.autodetect;
7275
}
7376

7477
private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) {
@@ -111,6 +114,7 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur
111114
}
112115
this.ignoreUnknownValues = loadConfigurationPb.getIgnoreUnknownValues();
113116
this.projectionFields = loadConfigurationPb.getProjectionFields();
117+
this.autodetect = loadConfigurationPb.getAutodetect();
114118
}
115119

116120
@Override
@@ -210,6 +214,12 @@ public Builder setProjectionFields(List<String> projectionFields) {
210214
return this;
211215
}
212216

217+
@Override
218+
public Builder setAutodetect(Boolean autodetect) {
219+
this.autodetect = autodetect;
220+
return this;
221+
}
222+
213223
@Override
214224
public WriteChannelConfiguration build() {
215225
return new WriteChannelConfiguration(this);
@@ -225,6 +235,7 @@ protected WriteChannelConfiguration(Builder builder) {
225235
this.schema = builder.schema;
226236
this.ignoreUnknownValues = builder.ignoreUnknownValues;
227237
this.projectionFields = builder.projectionFields;
238+
this.autodetect = builder.autodetect;
228239
}
229240

230241
@Override
@@ -320,6 +331,11 @@ public List<String> getProjectionFields() {
320331
return projectionFields;
321332
}
322333

334+
@Override
335+
public Boolean getAutodetect() {
336+
return autodetect;
337+
}
338+
323339
@Override
324340
public Builder toBuilder() {
325341
return new Builder(this);
@@ -334,7 +350,8 @@ MoreObjects.ToStringHelper toStringHelper() {
334350
.add("maxBadRecords", maxBadRecords)
335351
.add("schema", schema)
336352
.add("ignoreUnknownValue", ignoreUnknownValues)
337-
.add("projectionFields", projectionFields);
353+
.add("projectionFields", projectionFields)
354+
.add("autodetect", autodetect);
338355
}
339356

340357
@Override
@@ -352,7 +369,7 @@ public boolean equals(Object obj) {
352369
@Override
353370
public int hashCode() {
354371
return Objects.hash(destinationTable, createDisposition, writeDisposition, formatOptions,
355-
maxBadRecords, schema, ignoreUnknownValues, projectionFields);
372+
maxBadRecords, schema, ignoreUnknownValues, projectionFields, autodetect);
356373
}
357374

358375
WriteChannelConfiguration setProjectId(String projectId) {
@@ -389,6 +406,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
389406
loadConfigurationPb.setMaxBadRecords(maxBadRecords);
390407
loadConfigurationPb.setIgnoreUnknownValues(ignoreUnknownValues);
391408
loadConfigurationPb.setProjectionFields(projectionFields);
409+
loadConfigurationPb.setAutodetect(autodetect);
392410
return new com.google.api.services.bigquery.model.JobConfiguration()
393411
.setLoad(loadConfigurationPb);
394412
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ public class ExternalTableDefinitionTest {
4646
private static final Integer MAX_BAD_RECORDS = 42;
4747
private static final Boolean IGNORE_UNKNOWN_VALUES = true;
4848
private static final String COMPRESSION = "GZIP";
49+
private static final Boolean AUTODETECT = true;
4950
private static final CsvOptions CSV_OPTIONS = CsvOptions.newBuilder().build();
5051
private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION =
5152
ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS)
5253
.setCompression(COMPRESSION)
5354
.setIgnoreUnknownValues(IGNORE_UNKNOWN_VALUES)
5455
.setMaxBadRecords(MAX_BAD_RECORDS)
56+
.setAutodetect(AUTODETECT)
5557
.build();
5658
private static final ExternalTableDefinition DEPRECATED_EXTERNAL_TABLE_DEFINITION =
5759
ExternalTableDefinition.builder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS)
@@ -89,6 +91,7 @@ public void testBuilder() {
8991
assertEquals(MAX_BAD_RECORDS, EXTERNAL_TABLE_DEFINITION.getMaxBadRecords());
9092
assertEquals(TABLE_SCHEMA, EXTERNAL_TABLE_DEFINITION.getSchema());
9193
assertEquals(SOURCE_URIS, EXTERNAL_TABLE_DEFINITION.getSourceUris());
94+
assertEquals(AUTODETECT, EXTERNAL_TABLE_DEFINITION.getAutodetect());
9295
}
9396

9497
@Test
@@ -122,5 +125,6 @@ private void compareExternalTableDefinition(ExternalTableDefinition expected,
122125
assertEquals(expected.getSchema(), value.getSchema());
123126
assertEquals(expected.getSourceUris(), value.getSourceUris());
124127
assertEquals(expected.hashCode(), value.hashCode());
128+
assertEquals(expected.getAutodetect(), value.getAutodetect());
125129
}
126130
}

google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ public class LoadJobConfigurationTest {
4747
.build();
4848
private static final List<String> SOURCE_URIS = ImmutableList.of("uri1", "uri2");
4949
private static final Schema TABLE_SCHEMA = Schema.of(FIELD_SCHEMA);
50+
private static final Boolean AUTODETECT = true;
5051
private static final LoadJobConfiguration LOAD_CONFIGURATION =
5152
LoadJobConfiguration.newBuilder(TABLE_ID, SOURCE_URIS)
5253
.setCreateDisposition(CREATE_DISPOSITION)
@@ -56,6 +57,7 @@ public class LoadJobConfigurationTest {
5657
.setMaxBadRecords(MAX_BAD_RECORDS)
5758
.setProjectionFields(PROJECTION_FIELDS)
5859
.setSchema(TABLE_SCHEMA)
60+
.setAutodetect(AUTODETECT)
5961
.build();
6062
private static final LoadJobConfiguration DEPRECATED_LOAD_CONFIGURATION =
6163
LoadJobConfiguration.builder(TABLE_ID, SOURCE_URIS)
@@ -129,6 +131,7 @@ public void testBuilderDeprecated() {
129131
assertEquals(MAX_BAD_RECORDS, LOAD_CONFIGURATION.getMaxBadRecords());
130132
assertEquals(PROJECTION_FIELDS, LOAD_CONFIGURATION.getProjectionFields());
131133
assertEquals(TABLE_SCHEMA, LOAD_CONFIGURATION.getSchema());
134+
assertEquals(AUTODETECT, LOAD_CONFIGURATION.getAutodetect());
132135
}
133136

134137
@Test
@@ -159,5 +162,6 @@ private void compareLoadJobConfiguration(LoadJobConfiguration expected,
159162
assertEquals(expected.getMaxBadRecords(), value.getMaxBadRecords());
160163
assertEquals(expected.getProjectionFields(), value.getProjectionFields());
161164
assertEquals(expected.getSchema(), value.getSchema());
165+
assertEquals(expected.getAutodetect(), value.getAutodetect());
162166
}
163167
}

0 commit comments

Comments
 (0)