Skip to content

Commit c539b9b

Browse files
Google APIscopybara-github
authored andcommitted
feat: add a Arrow compression options (Only LZ4 for now).
feat: Return schema on first ReadRowsResponse. doc: clarify limit on filter string. Committer: @emkornfield PiperOrigin-RevId: 365759522
1 parent 3a8abe5 commit c539b9b

File tree

4 files changed

+54
-18
lines changed

4 files changed

+54
-18
lines changed

google/cloud/bigquery/storage/v1/arrow.proto

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -43,3 +42,19 @@ message ArrowRecordBatch {
4342
// The count of rows in `serialized_record_batch`.
4443
int64 row_count = 2;
4544
}
45+
46+
// Contains options specific to Arrow Serialization.
47+
message ArrowSerializationOptions {
48+
// Compression codec's supported by Arrow.
49+
enum CompressionCodec {
50+
// If unspecified no compression will be used.
51+
COMPRESSION_UNSPECIFIED = 0;
52+
53+
// LZ4 Frame (https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md)
54+
LZ4_FRAME = 1;
55+
}
56+
57+
// The compression codec to use for Arrow buffers in serialized record
58+
// batches.
59+
CompressionCodec buffer_compression = 2;
60+
}

google/cloud/bigquery/storage/v1/avro.proto

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

google/cloud/bigquery/storage/v1/storage.proto

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -70,7 +69,8 @@ service BigQueryRead {
7069
post: "/v1/{read_session.table=projects/*/datasets/*/tables/*}"
7170
body: "*"
7271
};
73-
option (google.api.method_signature) = "parent,read_session,max_stream_count";
72+
option (google.api.method_signature) =
73+
"parent,read_session,max_stream_count";
7474
}
7575

7676
// Reads rows from the stream in the format prescribed by the ReadSession.
@@ -99,7 +99,8 @@ service BigQueryRead {
9999
// original, primary, and residual, that original[0-j] = primary[0-j] and
100100
// original[j-n] = residual[0-m] once the streams have been read to
101101
// completion.
102-
rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
102+
rpc SplitReadStream(SplitReadStreamRequest)
103+
returns (SplitReadStreamResponse) {
103104
option (google.api.http) = {
104105
get: "/v1/{name=projects/*/locations/*/sessions/*/streams/*}"
105106
};
@@ -201,6 +202,19 @@ message ReadRowsResponse {
201202
// Throttling state. If unset, the latest response still describes
202203
// the current throttling status.
203204
ThrottleState throttle_state = 5;
205+
206+
// The schema for the read. If read_options.selected_fields is set, the
207+
// schema may be different from the table schema as it will only contain
208+
// the selected fields. This schema is equivelant to the one returned by
209+
// CreateSession. This field is only populated in the first ReadRowsResponse
210+
// RPC.
211+
oneof schema {
212+
// Output only. Avro schema.
213+
AvroSchema avro_schema = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
214+
215+
// Output only. Arrow schema.
216+
ArrowSchema arrow_schema = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
217+
}
204218
}
205219

206220
// Request message for `SplitReadStream`.

google/cloud/bigquery/storage/v1/stream.proto

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2019 Google LLC.
1+
// Copyright 2021 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
//
1514

1615
syntax = "proto3";
1716

@@ -72,17 +71,27 @@ message ReadSession {
7271
// "nullable_field is not NULL"
7372
// "st_equals(geo_field, st_geofromtext("POINT(2, 2)"))"
7473
// "numeric_field BETWEEN 1.0 AND 5.0"
74+
//
75+
// Restricted to a maximum length for 1 MB.
7576
string row_restriction = 2;
77+
78+
// Optional. Options specific to the Apache Arrow output format.
79+
oneof output_format_serialization_options {
80+
ArrowSerializationOptions arrow_serialization_options = 3
81+
[(google.api.field_behavior) = OPTIONAL];
82+
}
7683
}
7784

7885
// Output only. Unique identifier for the session, in the form
7986
// `projects/{project_id}/locations/{location}/sessions/{session_id}`.
8087
string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
8188

82-
// Output only. Time at which the session becomes invalid. After this time, subsequent
83-
// requests to read this Session will return errors. The expire_time is
84-
// automatically assigned and currently cannot be specified or updated.
85-
google.protobuf.Timestamp expire_time = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
89+
// Output only. Time at which the session becomes invalid. After this time,
90+
// subsequent requests to read this Session will return errors. The
91+
// expire_time is automatically assigned and currently cannot be specified or
92+
// updated.
93+
google.protobuf.Timestamp expire_time = 2
94+
[(google.api.field_behavior) = OUTPUT_ONLY];
8695

8796
// Immutable. Data format of the output data.
8897
DataFormat data_format = 3 [(google.api.field_behavior) = IMMUTABLE];
@@ -102,12 +111,11 @@ message ReadSession {
102111
// `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`
103112
string table = 6 [
104113
(google.api.field_behavior) = IMMUTABLE,
105-
(google.api.resource_reference) = {
106-
type: "bigquery.googleapis.com/Table"
107-
}
114+
(google.api.resource_reference) = { type: "bigquery.googleapis.com/Table" }
108115
];
109116

110-
// Optional. Any modifiers which are applied when reading from the specified table.
117+
// Optional. Any modifiers which are applied when reading from the specified
118+
// table.
111119
TableModifiers table_modifiers = 7 [(google.api.field_behavior) = OPTIONAL];
112120

113121
// Optional. Read options for this session (e.g. column selection, filters).

0 commit comments

Comments
 (0)