Skip to content

Commit 341d70f

Browse files
Google APIscopybara-github
authored andcommitted
feat: add ability to request compressed ReadRowsResponse rows
This change allows the client to request raw lz4 compression of the ReadRowsResponse rows data for both ArrowRecordBatches and Avro rows. PiperOrigin-RevId: 597000088
1 parent 91fb1b8 commit 341d70f

2 files changed

Lines changed: 37 additions & 0 deletions

File tree

google/cloud/bigquery/storage/v1/storage.proto

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,23 @@ message ReadRowsResponse {
348348
// Output only. Arrow schema.
349349
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
350350
}
351+
352+
// Optional. If the row data in this ReadRowsResponse is compressed, then
353+
// uncompressed byte size is the original size of the uncompressed row data.
354+
// If it is set to a value greater than 0, then decompress into a buffer of
355+
// size uncompressed_byte_size using the compression codec that was requested
356+
// during session creation time and which is specified in
357+
// TableReadOptions.response_compression_codec in ReadSession.
358+
// This value is not set if no response_compression_codec was not requested
359+
// and it is -1 if the requested compression would not have reduced the size
360+
// of this ReadRowsResponse's row data. This attempts to match Apache Arrow's
361+
// behavior described here https://github.com/apache/arrow/issues/15102 where
362+
// the uncompressed length may be set to -1 to indicate that the data that
363+
// follows is not compressed, which can be useful for cases where compression
364+
// does not yield appreciable savings. When uncompressed_byte_size is not
365+
// greater than 0, the client should skip decompression.
366+
optional int64 uncompressed_byte_size = 9
367+
[(google.api.field_behavior) = OPTIONAL];
351368
}
352369

353370
// Request message for `SplitReadStream`.

google/cloud/bigquery/storage/v1/stream.proto

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,21 @@ message ReadSession {
5959

6060
// Options dictating how we read a table.
6161
message TableReadOptions {
62+
// Specifies which compression codec to attempt on the entire serialized
63+
// response payload (either Arrow record batch or Avro rows). This is
64+
// not to be confused with the Apache Arrow native compression codecs
65+
// specified in ArrowSerializationOptions. For performance reasons, when
66+
// creating a read session requesting Arrow responses, setting both native
67+
// Arrow compression and application-level response compression will not be
68+
// allowed - choose, at most, one kind of compression.
69+
enum ResponseCompressionCodec {
70+
// Default is no compression.
71+
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0;
72+
73+
// Use raw LZ4 compression.
74+
RESPONSE_COMPRESSION_CODEC_LZ4 = 2;
75+
}
76+
6277
// Optional. The names of the fields in the table to be returned. If no
6378
// field names are specified, then all fields in the table are returned.
6479
//
@@ -138,6 +153,11 @@ message ReadSession {
138153
// https://cloud.google.com/bigquery/docs/table-sampling)
139154
optional double sample_percentage = 5
140155
[(google.api.field_behavior) = OPTIONAL];
156+
157+
// Optional. Set response_compression_codec when creating a read session to
158+
// enable application-level compression of ReadRows responses.
159+
optional ResponseCompressionCodec response_compression_codec = 6
160+
[(google.api.field_behavior) = OPTIONAL];
141161
}
142162

143163
// Output only. Unique identifier for the session, in the form

0 commit comments

Comments
 (0)