---

mmladenovski · shollyman · commit 026e8be17cc3 · 2019-06-07T11:54:38.000-07:00
yaml --- r: 30589 b: refs/heads/autosynth-automl c: a364aab h: refs/heads/master i: 30587: 3e41427
diff --git a/[refs] b/[refs]
@@ -121,7 +121,7 @@ refs/heads/spanner: b01127f885b4611bf1852abb0ce481eeb7fcc131
 refs/tags/v0.68.0: 9cc799fcf68c82ab431d425fefa58ef615ce8e5b
 refs/tags/v0.69.0: 78f67a29e8b9c46ba01de566a2eae0fd1c03edea
 refs/heads/autosynth-asset: bdb45634a0fe8f7a510692b56b31f5312e25f453
-refs/heads/autosynth-automl: 4ed9a31e938d8184dd0acf7d61a8affe8b64b15a
+refs/heads/autosynth-automl: a364aab78f1b0cbbad87abbe77f020b7ef0a942c
 refs/heads/autosynth-bigquerydatatransfer: d88aa5aae5fd9d3c6d75bbab1a05162c6d4d948f
 refs/heads/autosynth-bigquerystorage: d2c53da3b012e38c662e4df0738042435f19365f
 refs/heads/autosynth-bigtable: 9e5429f45cf9face9fed585d0233534993e36b58
diff --git a/branches/autosynth-automl/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java b/branches/autosynth-automl/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/ITBigQueryStorageTest.java
@@ -37,6 +37,7 @@
 import com.google.cloud.bigquery.StandardTableDefinition;
 import com.google.cloud.bigquery.TableId;
 import com.google.cloud.bigquery.TableInfo;
+import com.google.cloud.bigquery.TimePartitioning;
 import com.google.cloud.bigquery.storage.v1beta1.BigQueryStorageClient;
 import com.google.cloud.bigquery.storage.v1beta1.ReadOptions.TableReadOptions;
 import com.google.cloud.bigquery.storage.v1beta1.Storage.CreateReadSessionRequest;
@@ -61,7 +62,8 @@
 import java.util.List;
 import java.util.logging.Logger;
 import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.avro.util.Utf8;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -254,7 +256,7 @@ public void testFilter() throws IOException {
           response.getAvroRows(),
           new SimpleRowReader.AvroRowConsumer() {
             @Override
-            public void accept(GenericRecord record) {
+            public void accept(GenericData.Record record) {
               Long wordCount = (Long) record.get("word_count");
               assertWithMessage("Row not matching expectations: %s", record.toString())
                   .that(wordCount)
@@ -336,7 +338,7 @@ public void testColumnSelection() throws IOException {
           response.getAvroRows(),
           new SimpleRowReader.AvroRowConsumer() {
             @Override
-            public void accept(GenericRecord record) {
+            public void accept(GenericData.Record record) {
               String rowAssertMessage =
                   String.format("Row not matching expectations: %s", record.toString());
 
@@ -373,39 +375,133 @@ public void testReadAtSnapshot() throws InterruptedException, IOException {
             .build();
 
     Job firstJob =
-        RunQueryJobAndExpectSuccess(
+        RunQueryAppendJobAndExpectSuccess(
             /* destinationTableId = */ testTableId, /* query = */ "SELECT 1 AS col");
 
     Job secondJob =
-        RunQueryJobAndExpectSuccess(
+        RunQueryAppendJobAndExpectSuccess(
             /* destinationTableId = */ testTableId, /* query = */ "SELECT 2 AS col");
 
     final List<Long> rowsAfterFirstSnapshot = new ArrayList<>();
     ProcessRowsAtSnapshot(
-        tableReference,
-        firstJob.getStatistics().getEndTime(),
-        new AvroRowConsumer() {
+        /* tableReference = */ tableReference,
+        /* snapshotInMillis = */ firstJob.getStatistics().getEndTime(),
+        /* filter = */ null,
+        /* consumer = */ new AvroRowConsumer() {
           @Override
-          public void accept(GenericRecord record) {
+          public void accept(GenericData.Record record) {
             rowsAfterFirstSnapshot.add((Long) record.get("col"));
           }
         });
     assertEquals(Arrays.asList(1L), rowsAfterFirstSnapshot);
 
     final List<Long> rowsAfterSecondSnapshot = new ArrayList<>();
     ProcessRowsAtSnapshot(
-        tableReference,
-        secondJob.getStatistics().getEndTime(),
-        new AvroRowConsumer() {
+        /* tableReference = */ tableReference,
+        /* snapshotInMillis = */ secondJob.getStatistics().getEndTime(),
+        /* filter = */ null,
+        /* consumer = */ new AvroRowConsumer() {
           @Override
-          public void accept(GenericRecord record) {
+          public void accept(GenericData.Record record) {
             rowsAfterSecondSnapshot.add((Long) record.get("col"));
           }
         });
     Collections.sort(rowsAfterSecondSnapshot);
     assertEquals(Arrays.asList(1L, 2L), rowsAfterSecondSnapshot);
   }
 
+  @Test
+  public void testColumnPartitionedTableByDateField() throws InterruptedException, IOException {
+    String partitionedTableName = "test_column_partition_table_by_date";
+    String createTableStatement =
+        String.format(
+            " CREATE TABLE %s.%s (num_field INT64, date_field DATE) "
+                + " PARTITION BY date_field "
+                + " OPTIONS( "
+                + "   description=\"a table partitioned by date_field\" "
+                + " ) "
+                + "AS "
+                + "   SELECT 1, CAST(\"2019-01-01\" AS DATE)"
+                + "   UNION ALL"
+                + "   SELECT 2, CAST(\"2019-01-02\" AS DATE)"
+                + "   UNION ALL"
+                + "   SELECT 3, CAST(\"2019-01-03\" AS DATE)",
+            DATASET, partitionedTableName);
+
+    RunQueryJobAndExpectSuccess(QueryJobConfiguration.newBuilder(createTableStatement).build());
+
+    TableReference tableReference =
+        TableReference.newBuilder()
+            .setTableId(partitionedTableName)
+            .setDatasetId(DATASET)
+            .setProjectId(ServiceOptions.getDefaultProjectId())
+            .build();
+
+    List<GenericData.Record> unfilteredRows =
+        ReadAllRows(/* tableReference = */ tableReference, /* filter = */ null);
+    assertEquals("Actual rows read: " + unfilteredRows.toString(), 3, unfilteredRows.size());
+
+    List<GenericData.Record> partitionFilteredRows =
+        ReadAllRows(
+            /* tableReference = */ tableReference,
+            /* filter = */ "date_field = CAST(\"2019-01-02\" AS DATE)");
+    assertEquals(
+        "Actual rows read: " + partitionFilteredRows.toString(), 1, partitionFilteredRows.size());
+    assertEquals(2L, partitionFilteredRows.get(0).get("num_field"));
+  }
+
+  @Test
+  public void testIngestionTimePartitionedTable() throws InterruptedException, IOException {
+    Field intFieldSchema =
+        Field.newBuilder("num_field", LegacySQLTypeName.INTEGER)
+            .setMode(Mode.REQUIRED)
+            .setDescription("IntegerDescription")
+            .build();
+    com.google.cloud.bigquery.Schema tableSchema =
+        com.google.cloud.bigquery.Schema.of(intFieldSchema);
+
+    TableId testTableId =
+        TableId.of(/* dataset = */ DATASET, /* table = */ "test_date_partitioned_table");
+    bigquery.create(
+        TableInfo.of(
+            testTableId,
+            StandardTableDefinition.newBuilder()
+                .setTimePartitioning(TimePartitioning.of(TimePartitioning.Type.DAY))
+                .setSchema(tableSchema)
+                .build()));
+
+    // Simulate ingestion for 2019-01-01.
+    RunQueryAppendJobAndExpectSuccess(
+        /* destinationTableId = */ TableId.of(
+            /* dataset = */ DATASET, /* table = */ testTableId.getTable() + "$20190101"),
+        /* query = */ "SELECT 1 AS num_field");
+
+    // Simulate ingestion for 2019-01-02.
+    RunQueryAppendJobAndExpectSuccess(
+        /* destinationTableId = */ TableId.of(
+            /* dataset = */ DATASET, /* table = */ testTableId.getTable() + "$20190102"),
+        /* query = */ "SELECT 2 AS num_field");
+
+    TableReference tableReference =
+        TableReference.newBuilder()
+            .setTableId(testTableId.getTable())
+            .setDatasetId(testTableId.getDataset())
+            .setProjectId(ServiceOptions.getDefaultProjectId())
+            .build();
+
+    List<GenericData.Record> unfilteredRows =
+        ReadAllRows(/* tableReference = */ tableReference, /* filter = */ null);
+    assertEquals("Actual rows read: " + unfilteredRows.toString(), 2, unfilteredRows.size());
+
+    List<GenericData.Record> partitionFilteredRows =
+        ReadAllRows(
+            /* tableReference = */ tableReference,
+            /* filter = */ "_PARTITIONDATE > \"2019-01-01\"");
+    assertEquals(
+        "Actual rows read: " + partitionFilteredRows.toString(), 1, partitionFilteredRows.size());
+    assertEquals(2L, partitionFilteredRows.get(0).get("num_field"));
+  }
+
   /**
    * Reads to the specified row offset within the stream. If the stream does not have the desired
    * rows to read, it will read all of them.
@@ -436,18 +532,18 @@ private long ReadStreamToOffset(Stream stream, long rowOffset) {
   }
 
   /**
-   * Reads all the rows from the specified tableReference that are added up to timestamp defined in
-   * snapshot. If snapshot is not provided, current time will be used.
+   * Reads all the rows from the specified tableReference.
    *
    * <p>For every row, the consumer is called for processing.
    *
    * @param tableReference
-   * @param snapshotInMillis
-   * @param consumer
+   * @param snapshotInMillis Optional. If specified, all rows up to timestamp will be returned.
+   * @param filter Optional. If specified, it will be used to restrict returned data.
+   * @param consumer that receives all Avro rows.
    * @throws IOException
    */
   private void ProcessRowsAtSnapshot(
-      TableReference tableReference, Long snapshotInMillis, AvroRowConsumer consumer)
+      TableReference tableReference, Long snapshotInMillis, String filter, AvroRowConsumer consumer)
       throws IOException {
     Preconditions.checkNotNull(tableReference);
     Preconditions.checkNotNull(consumer);
@@ -469,6 +565,11 @@ private void ProcessRowsAtSnapshot(
           TableModifiers.newBuilder().setSnapshotTime(snapshotTimestamp).build());
     }
 
+    if (filter != null && !filter.isEmpty()) {
+      createSessionRequestBuilder.setReadOptions(
+          TableReadOptions.newBuilder().setRowRestriction(filter).build());
+    }
+
     ReadSession session = client.createReadSession(createSessionRequestBuilder.build());
     assertEquals(
         String.format(
@@ -492,6 +593,31 @@ private void ProcessRowsAtSnapshot(
     }
   }
 
+  /**
+   * Reads all the rows from the specified table reference and returns a list as generic Avro
+   * records.
+   *
+   * @param tableReference
+   * @param filter Optional. If specified, it will be used to restrict returned data.
+   * @return
+   */
+  List<GenericData.Record> ReadAllRows(TableReference tableReference, String filter)
+      throws IOException {
+    final List<GenericData.Record> rows = new ArrayList<>();
+    ProcessRowsAtSnapshot(
+        /* tableReference = */ tableReference,
+        /* snapshotInMillis = */ null,
+        /* filter = */ filter,
+        new AvroRowConsumer() {
+          @Override
+          public void accept(GenericData.Record record) {
+            // clone the record since that reference will be reused by the reader.
+            rows.add(new GenericRecordBuilder(record).build());
+          }
+        });
+    return rows;
+  }
+
   /**
    * Runs a query job with WRITE_APPEND disposition to the destination table and returns the
    * successfully completed job.
@@ -501,16 +627,26 @@ private void ProcessRowsAtSnapshot(
    * @return
    * @throws InterruptedException
    */
-  private Job RunQueryJobAndExpectSuccess(TableId destinationTableId, String query)
+  private Job RunQueryAppendJobAndExpectSuccess(TableId destinationTableId, String query)
       throws InterruptedException {
-    QueryJobConfiguration configuration =
+    return RunQueryJobAndExpectSuccess(
         QueryJobConfiguration.newBuilder(query)
             .setDestinationTable(destinationTableId)
             .setUseQueryCache(false)
             .setUseLegacySql(false)
             .setWriteDisposition(WriteDisposition.WRITE_APPEND)
-            .build();
+            .build());
+  }
 
+  /**
+   * Runs a query job with provided configuration and returns the successfully completed job.
+   *
+   * @param configuration
+   * @return
+   * @throws InterruptedException
+   */
+  private Job RunQueryJobAndExpectSuccess(QueryJobConfiguration configuration)
+      throws InterruptedException {
     Job job = bigquery.create(JobInfo.of(configuration));
     Job completedJob =
         job.waitFor(
diff --git a/branches/autosynth-automl/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java b/branches/autosynth-automl/google-cloud-clients/google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1beta1/it/SimpleRowReader.java
@@ -20,8 +20,8 @@
 import com.google.common.base.Preconditions;
 import java.io.IOException;
 import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericDatumReader;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.BinaryDecoder;
 import org.apache.avro.io.DatumReader;
 import org.apache.avro.io.DecoderFactory;
@@ -33,16 +33,23 @@
 public class SimpleRowReader {
 
   public interface AvroRowConsumer {
-    void accept(GenericRecord record);
+
+    /**
+     * Handler for every new Avro row that is read.
+     *
+     * @param record is Avro generic record structure. Consumers should not rely on the reference
+     *     and should copy it if needed. The record reference is reused.
+     */
+    void accept(GenericData.Record record);
   }
 
-  private final DatumReader<GenericRecord> datumReader;
+  private final DatumReader<GenericData.Record> datumReader;
 
   // Decoder object will be reused to avoid re-allocation and too much garbage collection.
   private BinaryDecoder decoder = null;
 
-  // GenericRecord object will be reused.
-  private GenericRecord row = null;
+  // Record object will be reused.
+  private GenericData.Record row = null;
 
   public SimpleRowReader(Schema schema) {
     Preconditions.checkNotNull(schema);