|
| 1 | +// Copyright 2025 Google LLC |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +import { |
| 16 | + IMetricsHandler, |
| 17 | + OnAttemptCompleteMetrics, |
| 18 | + OnOperationCompleteMetrics, |
| 19 | +} from './metrics-handler'; |
| 20 | +import * as Resources from '@opentelemetry/resources'; |
| 21 | +import * as ResourceUtil from '@google-cloud/opentelemetry-resource-util'; |
| 22 | +import {MetricExporter} from '@google-cloud/opentelemetry-cloud-monitoring-exporter'; |
| 23 | +import { |
| 24 | + OnAttemptCompleteAttributes, |
| 25 | + OnOperationCompleteAttributes, |
| 26 | +} from '../../common/client-side-metrics-attributes'; |
| 27 | +import {View} from '@opentelemetry/sdk-metrics'; |
| 28 | +const { |
| 29 | + Aggregation, |
| 30 | + ExplicitBucketHistogramAggregation, |
| 31 | + MeterProvider, |
| 32 | + Histogram, |
| 33 | + Counter, |
| 34 | + PeriodicExportingMetricReader, |
| 35 | +} = require('@opentelemetry/sdk-metrics'); |
| 36 | + |
| 37 | +/** |
| 38 | + * A collection of OpenTelemetry metric instruments used to record |
| 39 | + * Bigtable client-side metrics. |
| 40 | + */ |
| 41 | +interface Metrics { |
| 42 | + operationLatencies: typeof Histogram; |
| 43 | + attemptLatencies: typeof Histogram; |
| 44 | + retryCount: typeof Counter; |
| 45 | + applicationBlockingLatencies: typeof Histogram; |
| 46 | + firstResponseLatencies: typeof Histogram; |
| 47 | + serverLatencies: typeof Histogram; |
| 48 | + connectivityErrorCount: typeof Histogram; |
| 49 | + clientBlockingLatencies: typeof Histogram; |
| 50 | +} |
| 51 | + |
| 52 | +/** |
| 53 | + * A metrics handler implementation that uses OpenTelemetry to export metrics to Google Cloud Monitoring. |
| 54 | + * This handler records metrics such as operation latency, attempt latency, retry count, and more, |
| 55 | + * associating them with relevant attributes for detailed analysis in Cloud Monitoring. |
| 56 | + */ |
| 57 | +export class GCPMetricsHandler implements IMetricsHandler { |
| 58 | + private initialized = false; |
| 59 | + private otelMetrics?: Metrics; |
| 60 | + |
| 61 | + /** |
| 62 | + * Initializes the OpenTelemetry metrics instruments if they haven't been already. |
| 63 | + * Creates and registers metric instruments (histograms and counters) for various Bigtable client metrics. |
| 64 | + * Sets up a MeterProvider and configures a PeriodicExportingMetricReader for exporting metrics to Cloud Monitoring. |
| 65 | + * @param {string} [projectId] The Google Cloud project ID. Used for metric export. If not provided, it will attempt to detect it from the environment. |
| 66 | + */ |
| 67 | + private initialize(projectId?: string) { |
| 68 | + if (!this.initialized) { |
| 69 | + this.initialized = true; |
| 70 | + const sumAggregation = Aggregation.Sum(); |
| 71 | + const histogramAggregation = new ExplicitBucketHistogramAggregation([ |
| 72 | + 0, 0.01, 0.05, 0.1, 0.3, 0.6, 0.8, 1, 2, 3, 4, 5, 6, 8, 10, 13, 16, 20, |
| 73 | + 25, 30, 40, 50, 65, 80, 100, 130, 160, 200, 250, 300, 400, 500, 650, |
| 74 | + 800, 1000, 2000, 5000, 10000, 20000, 50000, 100000, |
| 75 | + ]); |
| 76 | + const viewList = [ |
| 77 | + 'operation_latencies', |
| 78 | + 'first_response_latencies', |
| 79 | + 'attempt_latencies', |
| 80 | + 'retry_count', |
| 81 | + 'server_latencies', |
| 82 | + 'connectivity_error_count', |
| 83 | + 'application_latencies', |
| 84 | + 'throttling_latencies', |
| 85 | + ].map( |
| 86 | + name => |
| 87 | + new View({ |
| 88 | + instrumentName: name, |
| 89 | + name, |
| 90 | + aggregation: name.slice(-9) ? sumAggregation : histogramAggregation, |
| 91 | + }) |
| 92 | + ); |
| 93 | + const meterProvider = new MeterProvider({ |
| 94 | + views: viewList, |
| 95 | + resource: new Resources.Resource({ |
| 96 | + 'service.name': 'bigtable-metrics', |
| 97 | + }).merge(new ResourceUtil.GcpDetectorSync().detect()), |
| 98 | + readers: [ |
| 99 | + // Register the exporter |
| 100 | + new PeriodicExportingMetricReader({ |
| 101 | + // Export metrics every 10 seconds. 5 seconds is the smallest sample period allowed by |
| 102 | + // Cloud Monitoring. |
| 103 | + exportIntervalMillis: 100_000, |
| 104 | + exporter: new MetricExporter({ |
| 105 | + projectId, |
| 106 | + }), |
| 107 | + }), |
| 108 | + ], |
| 109 | + }); |
| 110 | + const meter = meterProvider.getMeter('bigtable.googleapis.com'); |
| 111 | + this.otelMetrics = { |
| 112 | + operationLatencies: meter.createHistogram('operation_latencies', { |
| 113 | + description: |
| 114 | + "The total end-to-end latency across all RPC attempts associated with a Bigtable operation. This metric measures an operation's round trip from the client to Bigtable and back to the client and includes all retries.", |
| 115 | + }), |
| 116 | + attemptLatencies: meter.createHistogram('attempt_latencies', { |
| 117 | + description: |
| 118 | + 'The latencies of a client RPC attempt. Under normal circumstances, this value is identical to operation_latencies. If the client receives transient errors, however, then operation_latencies is the sum of all attempt_latencies and the exponential delays.', |
| 119 | + unit: 'ms', |
| 120 | + }), |
| 121 | + retryCount: meter.createCounter('retry_count', { |
| 122 | + description: |
| 123 | + 'A counter that records the number of attempts that an operation required to complete. Under normal circumstances, this value is empty.', |
| 124 | + }), |
| 125 | + applicationBlockingLatencies: meter.createHistogram( |
| 126 | + 'application_blocking_latencies', |
| 127 | + { |
| 128 | + description: |
| 129 | + 'The time from when the client receives the response to a request until the application reads the response. This metric is most relevant for ReadRows requests. The start and stop times for this metric depend on the way that you send the read request; see Application blocking latencies timer examples for details.', |
| 130 | + unit: 'ms', |
| 131 | + } |
| 132 | + ), |
| 133 | + firstResponseLatencies: meter.createHistogram( |
| 134 | + 'first_response_latencies', |
| 135 | + { |
| 136 | + description: |
| 137 | + 'Latencies from when a client sends a request and receives the first row of the response.', |
| 138 | + unit: 'ms', |
| 139 | + } |
| 140 | + ), |
| 141 | + serverLatencies: meter.createHistogram('server_latencies', { |
| 142 | + description: |
| 143 | + 'Latencies between the time when the Google frontend receives an RPC and when it sends the first byte of the response.', |
| 144 | + }), |
| 145 | + connectivityErrorCount: meter.createHistogram( |
| 146 | + 'connectivity_error_count', |
| 147 | + { |
| 148 | + description: |
| 149 | + "The number of requests that failed to reach Google's network. In normal cases, this number is 0. When the number is not 0, it can indicate connectivity issues between the application and the Google network.", |
| 150 | + } |
| 151 | + ), |
| 152 | + clientBlockingLatencies: meter.createHistogram( |
| 153 | + 'client_blocking_latencies', |
| 154 | + { |
| 155 | + description: |
| 156 | + 'Latencies introduced when the client blocks the sending of more requests to the server because of too many pending requests in a bulk operation.', |
| 157 | + unit: 'ms', |
| 158 | + } |
| 159 | + ), |
| 160 | + }; |
| 161 | + } |
| 162 | + } |
| 163 | + |
| 164 | + /** |
| 165 | + * Records metrics for a completed Bigtable operation. |
| 166 | + * This method records the operation latency and retry count, associating them with provided attributes. |
| 167 | + * @param {OnOperationCompleteMetrics} metrics Metrics related to the completed operation. |
| 168 | + * @param {OnOperationCompleteAttributes} attributes Attributes associated with the completed operation. |
| 169 | + */ |
| 170 | + onOperationComplete( |
| 171 | + metrics: OnOperationCompleteMetrics, |
| 172 | + attributes: OnOperationCompleteAttributes |
| 173 | + ) { |
| 174 | + this.initialize(); |
| 175 | + this.otelMetrics?.operationLatencies.record( |
| 176 | + metrics.operationLatency, |
| 177 | + attributes |
| 178 | + ); |
| 179 | + this.otelMetrics?.retryCount.add(metrics.retryCount, attributes); |
| 180 | + this.otelMetrics?.firstResponseLatencies.record( |
| 181 | + metrics.firstResponseLatency, |
| 182 | + attributes |
| 183 | + ); |
| 184 | + } |
| 185 | + |
| 186 | + /** |
| 187 | + * Records metrics for a completed attempt of a Bigtable operation. |
| 188 | + * This method records attempt latency, connectivity error count, server latency, and first response latency, |
| 189 | + * along with the provided attributes. |
| 190 | + * @param {OnAttemptCompleteMetrics} metrics Metrics related to the completed attempt. |
| 191 | + * @param {OnAttemptCompleteAttributes} attributes Attributes associated with the completed attempt. |
| 192 | + */ |
| 193 | + onAttemptComplete( |
| 194 | + metrics: OnAttemptCompleteMetrics, |
| 195 | + attributes: OnAttemptCompleteAttributes |
| 196 | + ) { |
| 197 | + this.initialize(); |
| 198 | + this.otelMetrics?.attemptLatencies.record( |
| 199 | + metrics.attemptLatency, |
| 200 | + attributes |
| 201 | + ); |
| 202 | + this.otelMetrics?.connectivityErrorCount.record( |
| 203 | + metrics.connectivityErrorCount, |
| 204 | + attributes |
| 205 | + ); |
| 206 | + this.otelMetrics?.serverLatencies.record(metrics.serverLatency, attributes); |
| 207 | + } |
| 208 | +} |
0 commit comments