feat: Add PrepareQuery api and update ExecuteQuery to support it

Google APIs · copybara-github · commit 9513189365a4 · 2025-03-06T13:46:27.000-08:00
docs: Update ExecuteQuery API docs to reflect changes

PiperOrigin-RevId: 734273312
diff --git a/google/bigtable/v2/bigtable.proto b/google/bigtable/v2/bigtable.proto
@@ -23,6 +23,7 @@ import "google/api/resource.proto";
 import "google/api/routing.proto";
 import "google/bigtable/v2/data.proto";
 import "google/bigtable/v2/request_stats.proto";
+import "google/bigtable/v2/types.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 import "google/protobuf/wrappers.proto";
@@ -275,7 +276,24 @@ service Bigtable {
     option (google.api.method_signature) = "table_name,app_profile_id";
   }
 
-  // Executes a BTQL query against a particular Cloud Bigtable instance.
+  // Prepares a GoogleSQL query for execution on a particular Bigtable instance.
+  rpc PrepareQuery(PrepareQueryRequest) returns (PrepareQueryResponse) {
+    option (google.api.http) = {
+      post: "/v2/{instance_name=projects/*/instances/*}:prepareQuery"
+      body: "*"
+    };
+    option (google.api.routing) = {
+      routing_parameters {
+        field: "instance_name"
+        path_template: "{name=projects/*/instances/*}"
+      }
+      routing_parameters { field: "app_profile_id" }
+    };
+    option (google.api.method_signature) = "instance_name,query";
+    option (google.api.method_signature) = "instance_name,query,app_profile_id";
+  }
+
+  // Executes a SQL query against a particular Bigtable instance.
   rpc ExecuteQuery(ExecuteQueryRequest) returns (stream ExecuteQueryResponse) {
     option (google.api.http) = {
       post: "/v2/{instance_name=projects/*/instances/*}:executeQuery"
@@ -1041,13 +1059,31 @@ message ExecuteQueryRequest {
   string app_profile_id = 2 [(google.api.field_behavior) = OPTIONAL];
 
   // Required. The query string.
-  string query = 3 [(google.api.field_behavior) = REQUIRED];
+  //
+  // Exactly one of `query` and `prepared_query` is required. Setting both
+  // or neither is an `INVALID_ARGUMENT`.
+  string query = 3 [deprecated = true, (google.api.field_behavior) = REQUIRED];
 
-  // Required. Requested data format for the response.
+  // A prepared query that was returned from `PrepareQueryResponse`.
+  //
+  // Exactly one of `query` and `prepared_query` is required. Setting both
+  // or neither is an `INVALID_ARGUMENT`.
+  //
+  // Setting this field also places restrictions on several other fields:
+  // - `data_format` must be empty.
+  // - `validate_only` must be false.
+  // - `params` must match the `param_types` set in the `PrepareQueryRequest`.
+  bytes prepared_query = 9;
+
+  // Requested data format for the response.
+  //
+  // If `prepared_query` is set, then the `data_format` is fixed by the
+  // `PrepareQueryRequest`, and a non-empty `data_format` in the
+  // `ExecuteQueryRequest` will be rejected with `INVALID_ARGUMENT`.
   oneof data_format {
     // Protocol buffer format as described by ProtoSchema and ProtoRows
     // messages.
-    ProtoFormat proto_format = 4;
+    ProtoFormat proto_format = 4 [deprecated = true];
   }
 
   // Optional. If this request is resuming a previously interrupted query
@@ -1067,17 +1103,21 @@ message ExecuteQueryRequest {
   //
   // For example, if
   // `params["firstName"] = bytes_value: "foo" type {bytes_type {}}`
-  //  then `@firstName` will be replaced with googlesql bytes value "foo" in the
-  //  query string during query evaluation.
+  // then `@firstName` will be replaced with googlesql bytes value "foo" in the
+  // query string during query evaluation.
   //
-  // In case of Value.kind is not set, it will be set to corresponding null
-  // value in googlesql.
-  //  `params["firstName"] =  type {string_type {}}`
-  //  then `@firstName` will be replaced with googlesql null string.
+  // If `Value.kind` is not set, the value is treated as a NULL value of the
+  // given type. For example, if
+  // `params["firstName"] = type {string_type {}}`
+  // then `@firstName` will be replaced with googlesql null string.
   //
-  // Value.type should always be set and no inference of type will be made from
-  // Value.kind. If Value.type is not set, we will return INVALID_ARGUMENT
-  // error.
+  // If `query` is set, any empty `Value.type` in the map will be rejected with
+  // `INVALID_ARGUMENT`.
+  //
+  // If `prepared_query` is set, any empty `Value.type` in the map will be
+  // inferred from the `param_types` in the `PrepareQueryRequest`. Any non-empty
+  // `Value.type` must match the corresponding `param_types` entry, or be
+  // rejected with `INVALID_ARGUMENT`.
   map<string, Value> params = 7 [(google.api.field_behavior) = REQUIRED];
 }
 
@@ -1100,3 +1140,63 @@ message ExecuteQueryResponse {
     PartialResultSet results = 2;
   }
 }
+
+// Request message for Bigtable.PrepareQuery
+message PrepareQueryRequest {
+  // Required. The unique name of the instance against which the query should be
+  // executed.
+  // Values are of the form `projects/<project>/instances/<instance>`
+  string instance_name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "bigtableadmin.googleapis.com/Instance"
+    }
+  ];
+
+  // Optional. This value specifies routing for preparing the query. Note that
+  // this `app_profile_id` is only used for preparing the query. The actual
+  // query execution will use the app profile specified in the
+  // `ExecuteQueryRequest`. If not specified, the `default` application profile
+  // will be used.
+  string app_profile_id = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Required. The query string.
+  string query = 3 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Requested data format for the response. Note that the selected
+  // data format is binding for all `ExecuteQuery` rpcs that use the prepared
+  // query.
+  oneof data_format {
+    // Protocol buffer format as described by ProtoSchema and ProtoRows
+    // messages.
+    ProtoFormat proto_format = 4;
+  }
+
+  // Required. `param_types` is a map of parameter identifier strings to their
+  // `Type`s.
+  //
+  // In query string, a parameter placeholder consists of the
+  // `@` character followed by the parameter name (for example, `@firstName`) in
+  // the query string.
+  //
+  // For example, if param_types["firstName"] = Bytes then @firstName will be a
+  // query parameter of type Bytes. The specific `Value` to be used for the
+  // query execution must be sent in `ExecuteQueryRequest` in the `params` map.
+  map<string, Type> param_types = 6 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Response message for Bigtable.PrepareQueryResponse
+message PrepareQueryResponse {
+  // Structure of rows in the response stream of `ExecuteQueryResponse` for the
+  // returned `prepared_query`.
+  ResultSetMetadata metadata = 1;
+
+  // A serialized prepared query. Clients should treat this as an opaque
+  // blob of bytes to send in `ExecuteQueryRequest`.
+  bytes prepared_query = 2;
+
+  // The time at which the prepared query token becomes invalid.
+  // A token may become invalid early due to changes in the data being read, but
+  // it provides a guideline to refresh query plans asynchronously.
+  google.protobuf.Timestamp valid_until = 3;
+}
diff --git a/google/bigtable/v2/data.proto b/google/bigtable/v2/data.proto
@@ -724,54 +724,115 @@ message ProtoRows {
   repeated Value values = 2;
 }
 
-// Batch of serialized ProtoRows.
+// A part of a serialized `ProtoRows` message.
 message ProtoRowsBatch {
-  // Merge partial results by concatenating these bytes, then parsing the
-  // overall value as a `ProtoRows` message.
+  // Part of a serialized `ProtoRows` message.
+  // A complete, parseable ProtoRows message is constructed by
+  // concatenating `batch_data` from multiple `ProtoRowsBatch` messages. The
+  // `PartialResultSet` that contains the last part has `complete_batch` set to
+  // `true`.
   bytes batch_data = 1;
 }
 
 // A partial result set from the streaming query API.
-// CBT client will buffer partial_rows from result_sets until it gets a
-// resumption_token.
+// Cloud Bigtable clients buffer partial results received in this message until
+// a `resume_token` is received.
+//
+// The pseudocode below describes how to buffer and parse a stream of
+// `PartialResultSet` messages.
+//
+// Having:
+// - queue of row results waiting to be returned `queue`
+// - extensible buffer of bytes `buffer`
+// - a place to keep track of the most recent `resume_token`
+// for each PartialResultSet `p` received {
+//   if p.reset {
+//     ensure `queue` is empty
+//     ensure `buffer` is empty
+//   }
+//   if p.estimated_batch_size != 0 {
+//     (optional) ensure `buffer` is sized to at least `p.estimated_batch_size`
+//   }
+//   if `p.proto_rows_batch` is set {
+//     append `p.proto_rows_batch.bytes` to `buffer`
+//   }
+//   if p.batch_checksum is set and `buffer` is not empty {
+//     validate the checksum matches the contents of `buffer`
+//     (see comments on `batch_checksum`)
+//     parse `buffer` as `ProtoRows` message, clearing `buffer`
+//     add parsed rows to end of `queue`
+//   }
+//   if p.resume_token is set {
+//     release results in `queue`
+//     save `p.resume_token` in `resume_token`
+//   }
+// }
 message PartialResultSet {
-  // Partial Rows in one of the supported formats. It may require many
-  // PartialResultSets to stream a batch of rows that can decoded on the client.
-  // The client should buffer partial_rows until it gets a `resume_token`,
-  // at which point the batch is complete and can be decoded and yielded to the
-  // user. Each sub-message documents the appropriate way to combine results.
+  // Some rows of the result set in one of the supported formats.
+  //
+  // Multiple `PartialResultSet` messages may be sent to represent a complete
+  // response. The client should buffer data constructed from the fields in
+  // `partial_rows` until a non-empty `resume_token` is received. Each
+  // sub-message documents the appropriate way to combine results.
   oneof partial_rows {
     // Partial rows in serialized ProtoRows format.
     ProtoRowsBatch proto_rows_batch = 3;
   }
 
+  // CRC32C checksum of concatenated `partial_rows` data for the current batch.
+  //
+  // When present, the buffered data from `partial_rows` forms a complete
+  // parseable message of the appropriate type.
+  //
+  // The client should mark the end of a parseable message and prepare to
+  // receive a new one starting from the next `PartialResultSet` message.
+  // Clients must verify the checksum of the serialized batch before yielding it
+  // to the caller.
+  //
+  // This does NOT mean the values can be yielded to the callers since a
+  // `resume_token` is required to safely do so.
+  //
+  // If `resume_token` is non-empty and any data has been received since the
+  // last one, this field is guaranteed to be non-empty. In other words, clients
+  // may assume that a batch will never cross a `resume_token` boundary.
+  optional uint32 batch_checksum = 6;
+
   // An opaque token sent by the server to allow query resumption and signal
-  // the client to accumulate `partial_rows` since the last non-empty
-  // `resume_token`. On resumption, the resumed query will return the remaining
-  // rows for this query.
+  // that the buffered values constructed from received `partial_rows` can be
+  // yielded to the caller. Clients can provide this token in a subsequent
+  // request to resume the result stream from the current point.
+  //
+  // When `resume_token` is non-empty, the buffered values received from
+  // `partial_rows` since the last non-empty `resume_token` can be yielded to
+  // the callers, provided that the client keeps the value of `resume_token` and
+  // uses it on subsequent retries.
   //
-  // If there is a batch in progress, a non-empty `resume_token`
-  // means that that the batch of `partial_rows` will be complete after merging
-  // the `partial_rows` from this response. The client must only yield
-  // completed batches to the application, and must ensure that any future
-  // retries send the latest token to avoid returning duplicate data.
+  // A `resume_token` may be sent without information in `partial_rows` to
+  // checkpoint the progress of a sparse query. Any previous `partial_rows` data
+  // should still be yielded in this case, and the new `resume_token` should be
+  // saved for future retries as normal.
   //
-  // The server may set 'resume_token' without a 'partial_rows'. If there is a
-  // batch in progress the client should yield it.
+  // A `resume_token` will only be sent on a boundary where there is either no
+  // ongoing result batch, or `batch_checksum` is also populated.
   //
   // The server will also send a sentinel `resume_token` when last batch of
   // `partial_rows` is sent. If the client retries the ExecuteQueryRequest with
   // the sentinel `resume_token`, the server will emit it again without any
-  // `partial_rows`, then return OK.
+  // data in `partial_rows`, then return OK.
   bytes resume_token = 5;
 
-  // Estimated size of a new batch. The server will always set this when
-  // returning the first `partial_rows` of a batch, and will not set it at any
-  // other time.
+  // If `true`, any data buffered since the last non-empty `resume_token` must
+  // be discarded before the other parts of this message, if any, are handled.
+  bool reset = 7;
+
+  // Estimated size of the buffer required to hold the next batch of results.
+  //
+  // This value will be sent with the first `partial_rows` of a batch. That is,
+  // on the first `partial_rows` received in a stream, on the first message
+  // after a `batch_checksum` message, and any time `reset` is true.
   //
-  // The client can use this estimate to allocate an initial buffer for the
-  // batched results. This helps minimize the number of allocations required,
-  // though the buffer size may still need to be increased if the estimate is
-  // too low.
+  // The client can use this estimate to allocate a buffer for the next batch of
+  // results. This helps minimize the number of allocations required, though the
+  // buffer size may still need to be increased if the estimate is too low.
   int32 estimated_batch_size = 4;
 }