feat: publish the Cloud Bigtable ExecuteQuery API

Google APIs · copybara-github · commit f681f79a9381 · 2024-07-09T09:42:40.000-07:00
The ExecuteQuery API will allow users to query Bigtable using SQL

PiperOrigin-RevId: 650660213
diff --git a/google/bigtable/v2/BUILD.bazel b/google/bigtable/v2/BUILD.bazel
@@ -46,6 +46,7 @@ proto_library(
         "feature_flags.proto",
         "request_stats.proto",
         "response_params.proto",
+        "types.proto",
     ],
     deps = [
         "//google/api:annotations_proto",
@@ -54,6 +55,7 @@ proto_library(
         "//google/api:resource_proto",
         "//google/api:routing_proto",
         "//google/rpc:status_proto",
+        "//google/type:date_proto",
         "@com_google_protobuf//:duration_proto",
         "@com_google_protobuf//:timestamp_proto",
         "@com_google_protobuf//:wrappers_proto",
@@ -124,6 +126,7 @@ go_proto_library(
     deps = [
         "//google/api:annotations_go_proto",
         "//google/rpc:status_go_proto",
+        "//google/type:date_go_proto",
     ],
 )
 
diff --git a/google/bigtable/v2/bigtable.proto b/google/bigtable/v2/bigtable.proto
@@ -274,6 +274,23 @@ service Bigtable {
     option (google.api.method_signature) = "table_name";
     option (google.api.method_signature) = "table_name,app_profile_id";
   }
+
+  // Executes a BTQL query against a particular Cloud Bigtable instance.
+  rpc ExecuteQuery(ExecuteQueryRequest) returns (stream ExecuteQueryResponse) {
+    option (google.api.http) = {
+      post: "/v2/{instance_name=projects/*/instances/*}:executeQuery"
+      body: "*"
+    };
+    option (google.api.routing) = {
+      routing_parameters {
+        field: "instance_name"
+        path_template: "{name=projects/*/instances/*}"
+      }
+      routing_parameters { field: "app_profile_id" }
+    };
+    option (google.api.method_signature) = "instance_name,query";
+    option (google.api.method_signature) = "instance_name,query,app_profile_id";
+  }
 }
 
 // Request message for Bigtable.ReadRows.
@@ -1006,3 +1023,80 @@ message ReadChangeStreamResponse {
     CloseStream close_stream = 3;
   }
 }
+
+// Request message for Bigtable.ExecuteQuery
+message ExecuteQueryRequest {
+  // Required. The unique name of the instance against which the query should be
+  // executed.
+  // Values are of the form `projects/<project>/instances/<instance>`
+  string instance_name = 1 [
+    (google.api.field_behavior) = REQUIRED,
+    (google.api.resource_reference) = {
+      type: "bigtableadmin.googleapis.com/Instance"
+    }
+  ];
+
+  // Optional. This value specifies routing for replication. If not specified,
+  // the `default` application profile will be used.
+  string app_profile_id = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Required. The query string.
+  string query = 3 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Requested data format for the response.
+  oneof data_format {
+    // Protocol buffer format as described by ProtoSchema and ProtoRows
+    // messages.
+    ProtoFormat proto_format = 4;
+  }
+
+  // Optional. If this request is resuming a previously interrupted query
+  // execution, `resume_token` should be copied from the last
+  // PartialResultSet yielded before the interruption. Doing this
+  // enables the query execution to resume where the last one left
+  // off.
+  // The rest of the request parameters must exactly match the
+  // request that yielded this token. Otherwise the request will fail.
+  bytes resume_token = 8 [(google.api.field_behavior) = OPTIONAL];
+
+  // Required. params contains string type keys and Bigtable type values that
+  // bind to placeholders in the query string. In query string, a parameter
+  // placeholder consists of the
+  // `@` character followed by the parameter name (for example, `@firstName`) in
+  // the query string.
+  //
+  // For example, if
+  // `params["firstName"] = bytes_value: "foo" type {bytes_type {}}`
+  //  then `@firstName` will be replaced with googlesql bytes value "foo" in the
+  //  query string during query evaluation.
+  //
+  // In case of Value.kind is not set, it will be set to corresponding null
+  // value in googlesql.
+  //  `params["firstName"] =  type {string_type {}}`
+  //  then `@firstName` will be replaced with googlesql null string.
+  //
+  // Value.type should always be set and no inference of type will be made from
+  // Value.kind. If Value.type is not set, we will return INVALID_ARGUMENT
+  // error.
+  map<string, Value> params = 7 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Response message for Bigtable.ExecuteQuery
+message ExecuteQueryResponse {
+  // The first response streamed from the server is of type `ResultSetMetadata`
+  // and includes information about the columns and types of the result set.
+  // From there on, we stream `PartialResultSet` messages with no additional
+  // information. `PartialResultSet` will contain `resume_token` to restart the
+  // response if query interrupts. In case of resumption with `resume_token`,
+  // the server will not resend the ResultSetMetadata.
+  oneof response {
+    // Structure of rows in this response stream. The first (and only the first)
+    // response streamed from the server will be of this type.
+    ResultSetMetadata metadata = 1;
+
+    // A partial result set with row data potentially including additional
+    // instructions on how recent past and future partial responses should be
+    // interpreted.
+    PartialResultSet results = 2;
+  }
+}
diff --git a/google/bigtable/v2/data.proto b/google/bigtable/v2/data.proto
@@ -17,6 +17,9 @@ syntax = "proto3";
 package google.bigtable.v2;
 
 import "google/api/field_behavior.proto";
+import "google/bigtable/v2/types.proto";
+import "google/protobuf/timestamp.proto";
+import "google/type/date.proto";
 
 option csharp_namespace = "Google.Cloud.Bigtable.V2";
 option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
@@ -92,6 +95,21 @@ message Cell {
 // value (which may be of a more complex type). See the documentation of the
 // `Type` message for more details.
 message Value {
+  // The verified `Type` of this `Value`, if it cannot be inferred.
+  //
+  // Read results will never specify the encoding for `type` since the value
+  // will already have been decoded by the server. Furthermore, the `type` will
+  // be omitted entirely if it can be inferred from a previous response. The
+  // exact semantics for inferring `type` will vary, and are therefore
+  // documented separately for each read method.
+  //
+  // When using composite types (Struct, Array, Map) only the outermost `Value`
+  // will specify the `type`. This top-level `type` will define the types for
+  // any nested `Struct' fields, `Array` elements, or `Map` key/value pairs.
+  // If a nested `Value` provides a `type` on write, the request will be
+  // rejected with INVALID_ARGUMENT.
+  Type type = 7;
+
   // Options for transporting values within the protobuf type system. A given
   // `kind` may support more than one `type` and vice versa. On write, this is
   // roughly analogous to a GoogleSQL literal.
@@ -107,12 +125,42 @@ message Value {
     // The `type` field must be omitted.
     int64 raw_timestamp_micros = 9;
 
+    // Represents a typed value transported as a byte sequence.
+    bytes bytes_value = 2;
+
+    // Represents a typed value transported as a string.
+    string string_value = 3;
+
     // Represents a typed value transported as an integer.
-    // Default type for writes: `Int64`
     int64 int_value = 6;
+
+    // Represents a typed value transported as a boolean.
+    bool bool_value = 10;
+
+    // Represents a typed value transported as a floating point number.
+    double float_value = 11;
+
+    // Represents a typed value transported as a timestamp.
+    google.protobuf.Timestamp timestamp_value = 12;
+
+    // Represents a typed value transported as a date.
+    google.type.Date date_value = 13;
+
+    // Represents a typed value transported as a sequence of values.
+    // To differentiate between `Struct`, `Array`, and `Map`, the outermost
+    // `Value` must provide an explicit `type` on write. This `type` will
+    // apply recursively to the nested `Struct` fields, `Array` elements,
+    // or `Map` key/value pairs, which *must not* supply their own `type`.
+    ArrayValue array_value = 4;
   }
 }
 
+// `ArrayValue` is an ordered list of `Value`.
+message ArrayValue {
+  // The ordered elements in the array.
+  repeated Value values = 1;
+}
+
 // Specifies a contiguous range of rows.
 message RowRange {
   // The row key at which to start the range.
@@ -609,3 +657,84 @@ message StreamContinuationToken {
   // An encoded position in the stream to restart reading from.
   string token = 2;
 }
+
+// Protocol buffers format descriptor, as described by Messages ProtoSchema and
+// ProtoRows
+message ProtoFormat {}
+
+// Describes a column in a Bigtable Query Language result set.
+message ColumnMetadata {
+  // The name of the column.
+  string name = 1;
+
+  // The type of the column.
+  Type type = 2;
+}
+
+// ResultSet schema in proto format
+message ProtoSchema {
+  // The columns in the result set.
+  repeated ColumnMetadata columns = 1;
+}
+
+// Describes the structure of a Bigtable result set.
+message ResultSetMetadata {
+  // The schema of the ResultSet, contains ordered list of column names
+  // with types
+  oneof schema {
+    // Schema in proto format
+    ProtoSchema proto_schema = 1;
+  }
+}
+
+// Batch of serialized ProtoRows.
+message ProtoRowsBatch {
+  // Merge partial results by concatenating these bytes, then parsing the
+  // overall value as a `ProtoRows` message.
+  bytes batch_data = 1;
+}
+
+// A partial result set from the streaming query API.
+// CBT client will buffer partial_rows from result_sets until it gets a
+// resumption_token.
+message PartialResultSet {
+  // Partial Rows in one of the supported formats. It may require many
+  // PartialResultSets to stream a batch of rows that can decoded on the client.
+  // The client should buffer partial_rows until it gets a `resume_token`,
+  // at which point the batch is complete and can be decoded and yielded to the
+  // user. Each sub-message documents the appropriate way to combine results.
+  oneof partial_rows {
+    // Partial rows in serialized ProtoRows format.
+    ProtoRowsBatch proto_rows_batch = 3;
+  }
+
+  // An opaque token sent by the server to allow query resumption and signal
+  // the client to accumulate `partial_rows` since the last non-empty
+  // `resume_token`. On resumption, the resumed query will return the remaining
+  // rows for this query.
+  //
+  // If there is a batch in progress, a non-empty `resume_token`
+  // means that that the batch of `partial_rows` will be complete after merging
+  // the `partial_rows` from this response. The client must only yield
+  // completed batches to the application, and must ensure that any future
+  // retries send the latest token to avoid returning duplicate data.
+  //
+  // The server may set 'resume_token' without a 'partial_rows'. If there is a
+  // batch in progress the client should yield it.
+  //
+  // The server will also send a sentinel `resume_token` when last batch of
+  // `partial_rows` is sent. If the client retries the ExecuteQueryRequest with
+  // the sentinel `resume_token`, the server will emit it again without any
+  // `partial_rows`, then return OK.
+  bytes resume_token = 5;
+
+  // Estimated size of a new batch. The server will always set this when
+  // returning the first `partial_rows` of a batch, and will not set it at any
+  // other time.
+  //
+  // The client can use this estimate to allocate an initial buffer for the
+  // batched results. This helps minimize the number of allocations required,
+  // though the buffer size may still need to be increased if the estimate is
+  // too low.
+  int32 estimated_batch_size = 4;
+}
diff --git a/google/bigtable/v2/types.proto b/google/bigtable/v2/types.proto