Skip to content

Commit 9513189

Browse files
Google APIscopybara-github
authored andcommitted
feat: Add PrepareQuery api and update ExecuteQuery to support it
docs: Update ExecuteQuery API docs to reflect changes PiperOrigin-RevId: 734273312
1 parent 76254e6 commit 9513189

2 files changed

Lines changed: 202 additions & 41 deletions

File tree

google/bigtable/v2/bigtable.proto

Lines changed: 113 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import "google/api/resource.proto";
2323
import "google/api/routing.proto";
2424
import "google/bigtable/v2/data.proto";
2525
import "google/bigtable/v2/request_stats.proto";
26+
import "google/bigtable/v2/types.proto";
2627
import "google/protobuf/duration.proto";
2728
import "google/protobuf/timestamp.proto";
2829
import "google/protobuf/wrappers.proto";
@@ -275,7 +276,24 @@ service Bigtable {
275276
option (google.api.method_signature) = "table_name,app_profile_id";
276277
}
277278

278-
// Executes a BTQL query against a particular Cloud Bigtable instance.
279+
// Prepares a GoogleSQL query for execution on a particular Bigtable instance.
280+
rpc PrepareQuery(PrepareQueryRequest) returns (PrepareQueryResponse) {
281+
option (google.api.http) = {
282+
post: "/v2/{instance_name=projects/*/instances/*}:prepareQuery"
283+
body: "*"
284+
};
285+
option (google.api.routing) = {
286+
routing_parameters {
287+
field: "instance_name"
288+
path_template: "{name=projects/*/instances/*}"
289+
}
290+
routing_parameters { field: "app_profile_id" }
291+
};
292+
option (google.api.method_signature) = "instance_name,query";
293+
option (google.api.method_signature) = "instance_name,query,app_profile_id";
294+
}
295+
296+
// Executes a SQL query against a particular Bigtable instance.
279297
rpc ExecuteQuery(ExecuteQueryRequest) returns (stream ExecuteQueryResponse) {
280298
option (google.api.http) = {
281299
post: "/v2/{instance_name=projects/*/instances/*}:executeQuery"
@@ -1041,13 +1059,31 @@ message ExecuteQueryRequest {
10411059
string app_profile_id = 2 [(google.api.field_behavior) = OPTIONAL];
10421060

10431061
// Required. The query string.
1044-
string query = 3 [(google.api.field_behavior) = REQUIRED];
1062+
//
1063+
// Exactly one of `query` and `prepared_query` is required. Setting both
1064+
// or neither is an `INVALID_ARGUMENT`.
1065+
string query = 3 [deprecated = true, (google.api.field_behavior) = REQUIRED];
10451066

1046-
// Required. Requested data format for the response.
1067+
// A prepared query that was returned from `PrepareQueryResponse`.
1068+
//
1069+
// Exactly one of `query` and `prepared_query` is required. Setting both
1070+
// or neither is an `INVALID_ARGUMENT`.
1071+
//
1072+
// Setting this field also places restrictions on several other fields:
1073+
// - `data_format` must be empty.
1074+
// - `validate_only` must be false.
1075+
// - `params` must match the `param_types` set in the `PrepareQueryRequest`.
1076+
bytes prepared_query = 9;
1077+
1078+
// Requested data format for the response.
1079+
//
1080+
// If `prepared_query` is set, then the `data_format` is fixed by the
1081+
// `PrepareQueryRequest`, and a non-empty `data_format` in the
1082+
// `ExecuteQueryRequest` will be rejected with `INVALID_ARGUMENT`.
10471083
oneof data_format {
10481084
// Protocol buffer format as described by ProtoSchema and ProtoRows
10491085
// messages.
1050-
ProtoFormat proto_format = 4;
1086+
ProtoFormat proto_format = 4 [deprecated = true];
10511087
}
10521088

10531089
// Optional. If this request is resuming a previously interrupted query
@@ -1067,17 +1103,21 @@ message ExecuteQueryRequest {
10671103
//
10681104
// For example, if
10691105
// `params["firstName"] = bytes_value: "foo" type {bytes_type {}}`
1070-
// then `@firstName` will be replaced with googlesql bytes value "foo" in the
1071-
// query string during query evaluation.
1106+
// then `@firstName` will be replaced with googlesql bytes value "foo" in the
1107+
// query string during query evaluation.
10721108
//
1073-
// In case of Value.kind is not set, it will be set to corresponding null
1074-
// value in googlesql.
1075-
// `params["firstName"] = type {string_type {}}`
1076-
// then `@firstName` will be replaced with googlesql null string.
1109+
// If `Value.kind` is not set, the value is treated as a NULL value of the
1110+
// given type. For example, if
1111+
// `params["firstName"] = type {string_type {}}`
1112+
// then `@firstName` will be replaced with googlesql null string.
10771113
//
1078-
// Value.type should always be set and no inference of type will be made from
1079-
// Value.kind. If Value.type is not set, we will return INVALID_ARGUMENT
1080-
// error.
1114+
// If `query` is set, any empty `Value.type` in the map will be rejected with
1115+
// `INVALID_ARGUMENT`.
1116+
//
1117+
// If `prepared_query` is set, any empty `Value.type` in the map will be
1118+
// inferred from the `param_types` in the `PrepareQueryRequest`. Any non-empty
1119+
// `Value.type` must match the corresponding `param_types` entry, or be
1120+
// rejected with `INVALID_ARGUMENT`.
10811121
map<string, Value> params = 7 [(google.api.field_behavior) = REQUIRED];
10821122
}
10831123

@@ -1100,3 +1140,63 @@ message ExecuteQueryResponse {
11001140
PartialResultSet results = 2;
11011141
}
11021142
}
1143+
1144+
// Request message for Bigtable.PrepareQuery
1145+
message PrepareQueryRequest {
1146+
// Required. The unique name of the instance against which the query should be
1147+
// executed.
1148+
// Values are of the form `projects/<project>/instances/<instance>`
1149+
string instance_name = 1 [
1150+
(google.api.field_behavior) = REQUIRED,
1151+
(google.api.resource_reference) = {
1152+
type: "bigtableadmin.googleapis.com/Instance"
1153+
}
1154+
];
1155+
1156+
// Optional. This value specifies routing for preparing the query. Note that
1157+
// this `app_profile_id` is only used for preparing the query. The actual
1158+
// query execution will use the app profile specified in the
1159+
// `ExecuteQueryRequest`. If not specified, the `default` application profile
1160+
// will be used.
1161+
string app_profile_id = 2 [(google.api.field_behavior) = OPTIONAL];
1162+
1163+
// Required. The query string.
1164+
string query = 3 [(google.api.field_behavior) = REQUIRED];
1165+
1166+
// Required. Requested data format for the response. Note that the selected
1167+
// data format is binding for all `ExecuteQuery` rpcs that use the prepared
1168+
// query.
1169+
oneof data_format {
1170+
// Protocol buffer format as described by ProtoSchema and ProtoRows
1171+
// messages.
1172+
ProtoFormat proto_format = 4;
1173+
}
1174+
1175+
// Required. `param_types` is a map of parameter identifier strings to their
1176+
// `Type`s.
1177+
//
1178+
// In query string, a parameter placeholder consists of the
1179+
// `@` character followed by the parameter name (for example, `@firstName`) in
1180+
// the query string.
1181+
//
1182+
// For example, if param_types["firstName"] = Bytes then @firstName will be a
1183+
// query parameter of type Bytes. The specific `Value` to be used for the
1184+
// query execution must be sent in `ExecuteQueryRequest` in the `params` map.
1185+
map<string, Type> param_types = 6 [(google.api.field_behavior) = REQUIRED];
1186+
}
1187+
1188+
// Response message for Bigtable.PrepareQueryResponse
1189+
message PrepareQueryResponse {
1190+
// Structure of rows in the response stream of `ExecuteQueryResponse` for the
1191+
// returned `prepared_query`.
1192+
ResultSetMetadata metadata = 1;
1193+
1194+
// A serialized prepared query. Clients should treat this as an opaque
1195+
// blob of bytes to send in `ExecuteQueryRequest`.
1196+
bytes prepared_query = 2;
1197+
1198+
// The time at which the prepared query token becomes invalid.
1199+
// A token may become invalid early due to changes in the data being read, but
1200+
// it provides a guideline to refresh query plans asynchronously.
1201+
google.protobuf.Timestamp valid_until = 3;
1202+
}

google/bigtable/v2/data.proto

Lines changed: 89 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -724,54 +724,115 @@ message ProtoRows {
724724
repeated Value values = 2;
725725
}
726726

727-
// Batch of serialized ProtoRows.
727+
// A part of a serialized `ProtoRows` message.
728728
message ProtoRowsBatch {
729-
// Merge partial results by concatenating these bytes, then parsing the
730-
// overall value as a `ProtoRows` message.
729+
// Part of a serialized `ProtoRows` message.
730+
// A complete, parseable ProtoRows message is constructed by
731+
// concatenating `batch_data` from multiple `ProtoRowsBatch` messages. The
732+
// `PartialResultSet` that contains the last part has `complete_batch` set to
733+
// `true`.
731734
bytes batch_data = 1;
732735
}
733736

734737
// A partial result set from the streaming query API.
735-
// CBT client will buffer partial_rows from result_sets until it gets a
736-
// resumption_token.
738+
// Cloud Bigtable clients buffer partial results received in this message until
739+
// a `resume_token` is received.
740+
//
741+
// The pseudocode below describes how to buffer and parse a stream of
742+
// `PartialResultSet` messages.
743+
//
744+
// Having:
745+
// - queue of row results waiting to be returned `queue`
746+
// - extensible buffer of bytes `buffer`
747+
// - a place to keep track of the most recent `resume_token`
748+
// for each PartialResultSet `p` received {
749+
// if p.reset {
750+
// ensure `queue` is empty
751+
// ensure `buffer` is empty
752+
// }
753+
// if p.estimated_batch_size != 0 {
754+
// (optional) ensure `buffer` is sized to at least `p.estimated_batch_size`
755+
// }
756+
// if `p.proto_rows_batch` is set {
757+
// append `p.proto_rows_batch.bytes` to `buffer`
758+
// }
759+
// if p.batch_checksum is set and `buffer` is not empty {
760+
// validate the checksum matches the contents of `buffer`
761+
// (see comments on `batch_checksum`)
762+
// parse `buffer` as `ProtoRows` message, clearing `buffer`
763+
// add parsed rows to end of `queue`
764+
// }
765+
// if p.resume_token is set {
766+
// release results in `queue`
767+
// save `p.resume_token` in `resume_token`
768+
// }
769+
// }
737770
message PartialResultSet {
738-
// Partial Rows in one of the supported formats. It may require many
739-
// PartialResultSets to stream a batch of rows that can decoded on the client.
740-
// The client should buffer partial_rows until it gets a `resume_token`,
741-
// at which point the batch is complete and can be decoded and yielded to the
742-
// user. Each sub-message documents the appropriate way to combine results.
771+
// Some rows of the result set in one of the supported formats.
772+
//
773+
// Multiple `PartialResultSet` messages may be sent to represent a complete
774+
// response. The client should buffer data constructed from the fields in
775+
// `partial_rows` until a non-empty `resume_token` is received. Each
776+
// sub-message documents the appropriate way to combine results.
743777
oneof partial_rows {
744778
// Partial rows in serialized ProtoRows format.
745779
ProtoRowsBatch proto_rows_batch = 3;
746780
}
747781

782+
// CRC32C checksum of concatenated `partial_rows` data for the current batch.
783+
//
784+
// When present, the buffered data from `partial_rows` forms a complete
785+
// parseable message of the appropriate type.
786+
//
787+
// The client should mark the end of a parseable message and prepare to
788+
// receive a new one starting from the next `PartialResultSet` message.
789+
// Clients must verify the checksum of the serialized batch before yielding it
790+
// to the caller.
791+
//
792+
// This does NOT mean the values can be yielded to the callers since a
793+
// `resume_token` is required to safely do so.
794+
//
795+
// If `resume_token` is non-empty and any data has been received since the
796+
// last one, this field is guaranteed to be non-empty. In other words, clients
797+
// may assume that a batch will never cross a `resume_token` boundary.
798+
optional uint32 batch_checksum = 6;
799+
748800
// An opaque token sent by the server to allow query resumption and signal
749-
// the client to accumulate `partial_rows` since the last non-empty
750-
// `resume_token`. On resumption, the resumed query will return the remaining
751-
// rows for this query.
801+
// that the buffered values constructed from received `partial_rows` can be
802+
// yielded to the caller. Clients can provide this token in a subsequent
803+
// request to resume the result stream from the current point.
804+
//
805+
// When `resume_token` is non-empty, the buffered values received from
806+
// `partial_rows` since the last non-empty `resume_token` can be yielded to
807+
// the callers, provided that the client keeps the value of `resume_token` and
808+
// uses it on subsequent retries.
752809
//
753-
// If there is a batch in progress, a non-empty `resume_token`
754-
// means that that the batch of `partial_rows` will be complete after merging
755-
// the `partial_rows` from this response. The client must only yield
756-
// completed batches to the application, and must ensure that any future
757-
// retries send the latest token to avoid returning duplicate data.
810+
// A `resume_token` may be sent without information in `partial_rows` to
811+
// checkpoint the progress of a sparse query. Any previous `partial_rows` data
812+
// should still be yielded in this case, and the new `resume_token` should be
813+
// saved for future retries as normal.
758814
//
759-
// The server may set 'resume_token' without a 'partial_rows'. If there is a
760-
// batch in progress the client should yield it.
815+
// A `resume_token` will only be sent on a boundary where there is either no
816+
// ongoing result batch, or `batch_checksum` is also populated.
761817
//
762818
// The server will also send a sentinel `resume_token` when last batch of
763819
// `partial_rows` is sent. If the client retries the ExecuteQueryRequest with
764820
// the sentinel `resume_token`, the server will emit it again without any
765-
// `partial_rows`, then return OK.
821+
// data in `partial_rows`, then return OK.
766822
bytes resume_token = 5;
767823

768-
// Estimated size of a new batch. The server will always set this when
769-
// returning the first `partial_rows` of a batch, and will not set it at any
770-
// other time.
824+
// If `true`, any data buffered since the last non-empty `resume_token` must
825+
// be discarded before the other parts of this message, if any, are handled.
826+
bool reset = 7;
827+
828+
// Estimated size of the buffer required to hold the next batch of results.
829+
//
830+
// This value will be sent with the first `partial_rows` of a batch. That is,
831+
// on the first `partial_rows` received in a stream, on the first message
832+
// after a `batch_checksum` message, and any time `reset` is true.
771833
//
772-
// The client can use this estimate to allocate an initial buffer for the
773-
// batched results. This helps minimize the number of allocations required,
774-
// though the buffer size may still need to be increased if the estimate is
775-
// too low.
834+
// The client can use this estimate to allocate a buffer for the next batch of
835+
// results. This helps minimize the number of allocations required, though the
836+
// buffer size may still need to be increased if the estimate is too low.
776837
int32 estimated_batch_size = 4;
777838
}

0 commit comments

Comments
 (0)