Skip to content

Commit 66fc31d

Browse files
Google APIscopybara-github
authored andcommitted
feat: Publish new bigtable APIs for types and aggregates
Bigtable aggregates will allow users to configure column families whose cells accumulate values via an aggregation function rather than simply overwrite them PiperOrigin-RevId: 613716423
1 parent 8e2fbae commit 66fc31d

5 files changed

Lines changed: 221 additions & 1 deletion

File tree

google/bigtable/admin/v2/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ proto_library(
4646
"common.proto",
4747
"instance.proto",
4848
"table.proto",
49+
"types.proto",
4950
],
5051
deps = [
5152
"//google/api:annotations_proto",

google/bigtable/admin/v2/bigtable_table_admin.proto

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,12 @@ message ModifyColumnFamiliesRequest {
771771
// family exists.
772772
bool drop = 4;
773773
}
774+
775+
// Optional. A mask specifying which fields (e.g. `gc_rule`) in the `update`
776+
// mod should be updated, ignored for other modification types. If unset or
777+
// empty, we treat it as updating `gc_rule` to be backward compatible.
778+
google.protobuf.FieldMask update_mask = 6
779+
[(google.api.field_behavior) = OPTIONAL];
774780
}
775781

776782
// Required. The unique name of the table whose families should be modified.

google/bigtable/admin/v2/table.proto

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package google.bigtable.admin.v2;
1818

1919
import "google/api/field_behavior.proto";
2020
import "google/api/resource.proto";
21+
import "google/bigtable/admin/v2/types.proto";
2122
import "google/protobuf/duration.proto";
2223
import "google/protobuf/timestamp.proto";
2324
import "google/rpc/status.proto";
@@ -268,6 +269,18 @@ message ColumnFamily {
268269
// so it's possible for reads to return a cell even if it matches the active
269270
// GC expression for its family.
270271
GcRule gc_rule = 1;
272+
273+
// The type of data stored in each of this family's cell values, including its
274+
// full encoding. If omitted, the family only serves raw untyped bytes.
275+
//
276+
// For now, only the `Aggregate` type is supported.
277+
//
278+
// `Aggregate` can only be set at family creation and is immutable afterwards.
279+
//
280+
//
281+
// If `value_type` is `Aggregate`, written data must be compatible with:
282+
// * `value_type.input_type` for `AddInput` mutations
283+
Type value_type = 3;
271284
}
272285

273286
// Rule for determining which cells to delete during garbage collection.
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
syntax = "proto3";
16+
17+
package google.bigtable.admin.v2;
18+
19+
import "google/api/field_behavior.proto";
20+
21+
option csharp_namespace = "Google.Cloud.Bigtable.Admin.V2";
22+
option go_package = "google.golang.org/genproto/googleapis/bigtable/admin/v2;admin";
23+
option java_multiple_files = true;
24+
option java_outer_classname = "TypesProto";
25+
option java_package = "com.google.bigtable.admin.v2";
26+
option php_namespace = "Google\\Cloud\\Bigtable\\Admin\\V2";
27+
option ruby_package = "Google::Cloud::Bigtable::Admin::V2";
28+
29+
// `Type` represents the type of data that is written to, read from, or stored
30+
// in Bigtable. It is heavily based on the GoogleSQL standard to help maintain
31+
// familiarity and consistency across products and features.
32+
//
33+
// For compatibility with Bigtable's existing untyped APIs, each `Type` includes
34+
// an `Encoding` which describes how to convert to/from the underlying data.
35+
// This might involve composing a series of steps into an "encoding chain," for
36+
// example to convert from INT64 -> STRING -> raw bytes. In most cases, a "link"
37+
// in the encoding chain will be based an on existing GoogleSQL conversion
38+
// function like `CAST`.
39+
//
40+
// Each link in the encoding chain also defines the following properties:
41+
// * Natural sort: Does the encoded value sort consistently with the original
42+
// typed value? Note that Bigtable will always sort data based on the raw
43+
// encoded value, *not* the decoded type.
44+
// - Example: STRING values sort in the same order as their UTF-8 encodings.
45+
// - Counterexample: Encoding INT64 to a fixed-width STRING does *not*
46+
// preserve sort order when dealing with negative numbers.
47+
// INT64(1) > INT64(-1), but STRING("-00001") > STRING("00001).
48+
// - The overall encoding chain sorts naturally if *every* link does.
49+
// * Self-delimiting: If we concatenate two encoded values, can we always tell
50+
// where the first one ends and the second one begins?
51+
// - Example: If we encode INT64s to fixed-width STRINGs, the first value
52+
// will always contain exactly N digits, possibly preceded by a sign.
53+
// - Counterexample: If we concatenate two UTF-8 encoded STRINGs, we have
54+
// no way to tell where the first one ends.
55+
// - The overall encoding chain is self-delimiting if *any* link is.
56+
// * Compatibility: Which other systems have matching encoding schemes? For
57+
// example, does this encoding have a GoogleSQL equivalent? HBase? Java?
58+
message Type {
59+
// Bytes
60+
// Values of type `Bytes` are stored in `Value.bytes_value`.
61+
message Bytes {
62+
// Rules used to convert to/from lower level types.
63+
message Encoding {
64+
// Leaves the value "as-is"
65+
// * Natural sort? Yes
66+
// * Self-delimiting? No
67+
// * Compatibility? N/A
68+
message Raw {}
69+
70+
// Which encoding to use.
71+
oneof encoding {
72+
// Use `Raw` encoding.
73+
Raw raw = 1;
74+
}
75+
}
76+
77+
// The encoding to use when converting to/from lower level types.
78+
Encoding encoding = 1;
79+
}
80+
81+
// Int64
82+
// Values of type `Int64` are stored in `Value.int_value`.
83+
message Int64 {
84+
// Rules used to convert to/from lower level types.
85+
message Encoding {
86+
// Encodes the value as an 8-byte big endian twos complement `Bytes`
87+
// value.
88+
// * Natural sort? No (positive values only)
89+
// * Self-delimiting? Yes
90+
// * Compatibility?
91+
// - BigQuery Federation `BINARY` encoding
92+
// - HBase `Bytes.toBytes`
93+
// - Java `ByteBuffer.putLong()` with `ByteOrder.BIG_ENDIAN`
94+
message BigEndianBytes {
95+
// The underlying `Bytes` type, which may be able to encode further.
96+
Bytes bytes_type = 1;
97+
}
98+
99+
// Which encoding to use.
100+
oneof encoding {
101+
// Use `BigEndianBytes` encoding.
102+
BigEndianBytes big_endian_bytes = 1;
103+
}
104+
}
105+
106+
// The encoding to use when converting to/from lower level types.
107+
Encoding encoding = 1;
108+
}
109+
110+
// A value that combines incremental updates into a summarized value.
111+
//
112+
// Data is never directly written or read using type `Aggregate`. Writes will
113+
// provide either the `input_type` or `state_type`, and reads will always
114+
// return the `state_type` .
115+
message Aggregate {
116+
// Computes the sum of the input values.
117+
// Allowed input: `Int64`
118+
// State: same as input
119+
message Sum {}
120+
121+
// Type of the inputs that are accumulated by this `Aggregate`, which must
122+
// specify a full encoding.
123+
// Use `AddInput` mutations to accumulate new inputs.
124+
Type input_type = 1;
125+
126+
// Output only. Type that holds the internal accumulator state for the
127+
// `Aggregate`. This is a function of the `input_type` and `aggregator`
128+
// chosen, and will always specify a full encoding.
129+
Type state_type = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
130+
131+
// Which aggregator function to use. The configured types must match.
132+
oneof aggregator {
133+
// Sum aggregator.
134+
Sum sum = 4;
135+
}
136+
}
137+
138+
// The kind of type that this represents.
139+
oneof kind {
140+
// Bytes
141+
Bytes bytes_type = 1;
142+
143+
// Int64
144+
Int64 int64_type = 5;
145+
146+
// Aggregate
147+
Aggregate aggregate_type = 6;
148+
}
149+
}

google/bigtable/v2/data.proto

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 Google LLC
1+
// Copyright 2023 Google LLC
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -16,6 +16,8 @@ syntax = "proto3";
1616

1717
package google.bigtable.v2;
1818

19+
import "google/api/field_behavior.proto";
20+
1921
option csharp_namespace = "Google.Cloud.Bigtable.V2";
2022
option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
2123
option java_multiple_files = true;
@@ -85,6 +87,32 @@ message Cell {
8587
repeated string labels = 3;
8688
}
8789

90+
// `Value` represents a dynamically typed value.
91+
// The typed fields in `Value` are used as a transport encoding for the actual
92+
// value (which may be of a more complex type). See the documentation of the
93+
// `Type` message for more details.
94+
message Value {
95+
// Options for transporting values within the protobuf type system. A given
96+
// `kind` may support more than one `type` and vice versa. On write, this is
97+
// roughly analogous to a GoogleSQL literal.
98+
//
99+
// The value is `NULL` if none of the fields in `kind` is set. If `type` is
100+
// also omitted on write, we will infer it based on the schema.
101+
oneof kind {
102+
// Represents a raw byte sequence with no type information.
103+
// The `type` field must be omitted.
104+
bytes raw_value = 8;
105+
106+
// Represents a raw cell timestamp with no type information.
107+
// The `type` field must be omitted.
108+
int64 raw_timestamp_micros = 9;
109+
110+
// Represents a typed value transported as an integer.
111+
// Default type for writes: `Int64`
112+
int64 int_value = 6;
113+
}
114+
}
115+
88116
// Specifies a contiguous range of rows.
89117
message RowRange {
90118
// The row key at which to start the range.
@@ -463,6 +491,26 @@ message Mutation {
463491
bytes value = 4;
464492
}
465493

494+
// A Mutation which incrementally updates a cell in an `Aggregate` family.
495+
message AddToCell {
496+
// The name of the `Aggregate` family into which new data should be added.
497+
// This must be a family with a `value_type` of `Aggregate`.
498+
// Format: `[-_.a-zA-Z0-9]+`
499+
string family_name = 1;
500+
501+
// The qualifier of the column into which new data should be added. This
502+
// must be a `raw_value`.
503+
Value column_qualifier = 2;
504+
505+
// The timestamp of the cell to which new data should be added. This must
506+
// be a `raw_timestamp_micros` that matches the table's `granularity`.
507+
Value timestamp = 3;
508+
509+
// The input value to be accumulated into the specified cell. This must be
510+
// compatible with the family's `value_type.input_type`.
511+
Value input = 4;
512+
}
513+
466514
// A Mutation which deletes cells from the specified column, optionally
467515
// restricting the deletions to a given timestamp range.
468516
message DeleteFromColumn {
@@ -493,6 +541,9 @@ message Mutation {
493541
// Set a cell's value.
494542
SetCell set_cell = 1;
495543

544+
// Incrementally updates an `Aggregate` cell.
545+
AddToCell add_to_cell = 5;
546+
496547
// Deletes cells from a column.
497548
DeleteFromColumn delete_from_column = 2;
498549

0 commit comments

Comments
 (0)