Skip to content
This repository was archived by the owner on Jan 21, 2026. It is now read-only.

Commit 0072e5f

Browse files
authored
feat: emit an error log on potential memory leak scenario (#870)
1 parent c19850d commit 0072e5f

5 files changed

Lines changed: 109 additions & 25 deletions

File tree

src/config.ts

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,26 +24,6 @@ export type CLSMechanism =
2424

2525
/** Available configuration options. */
2626
export interface Config {
27-
/**
28-
* The trace context propagation mechanism to use. The following options are
29-
* available:
30-
* - 'async-hooks' uses an implementation of CLS on top of the Node core
31-
* `async_hooks` module in Node 8+. This option should not be used if the
32-
* Node binary version requirements are not met.
33-
* - 'async-listener' uses an implementation of CLS on top of the
34-
* `continuation-local-storage` module.
35-
* - 'auto' behaves like 'async-hooks' on Node 8+, and 'async-listener'
36-
* otherwise.
37-
* - 'none' disables CLS completely.
38-
* - 'singular' allows one root span to exist at a time. This option is meant
39-
* to be used internally by Google Cloud Functions, or in any other
40-
* environment where it is guaranteed that only one request is being served
41-
* at a time.
42-
* The 'auto' mechanism is used by default if this configuration option is
43-
* not explicitly set.
44-
*/
45-
clsMechanism?: CLSMechanism;
46-
4727
/**
4828
* Log levels: 0=disabled, 1=error, 2=warn, 3=info, 4=debug
4929
* The value of GCLOUD_TRACE_LOGLEVEL takes precedence over this value.
@@ -70,6 +50,43 @@ export interface Config {
7050
*/
7151
rootSpanNameOverride?: string|((name: string) => string);
7252

53+
/**
54+
* The trace context propagation mechanism to use. The following options are
55+
* available:
56+
* - 'async-hooks' uses an implementation of CLS on top of the Node core
57+
* `async_hooks` module in Node 8+. This option should not be used if the
58+
* Node binary version requirements are not met.
59+
* - 'async-listener' uses an implementation of CLS on top of the
60+
* `continuation-local-storage` module.
61+
* - 'auto' behaves like 'async-hooks' on Node 8+, and 'async-listener'
62+
* otherwise.
63+
* - 'none' disables CLS completely.
64+
* - 'singular' allows one root span to exist at a time. This option is meant
65+
* to be used internally by Google Cloud Functions, or in any other
66+
* environment where it is guaranteed that only one request is being served
67+
* at a time.
68+
* The 'auto' mechanism is used by default if this configuration option is
69+
* not explicitly set.
70+
*/
71+
clsMechanism?: CLSMechanism;
72+
73+
/**
74+
* The number of local spans per trace to allow before emitting an error log.
75+
* An unexpectedly large number of spans per trace may suggest a memory leak.
76+
* This value should be 1-2x the estimated maximum number of RPCs made on
77+
* behalf of a single incoming request.
78+
*/
79+
spansPerTraceSoftLimit?: number;
80+
81+
/**
82+
* The maximum number of local spans per trace to allow in total. Creating
83+
* more spans in a single trace will cause the agent to log an error, and such
84+
* spans will be dropped. (This limit does not apply when using a RootSpan
85+
* instance to create child spans.)
86+
* This value should be greater than spansPerTraceSoftLimit.
87+
*/
88+
spansPerTraceHardLimit?: number;
89+
7390
/**
7491
* The maximum number of characters reported on a label value. This value
7592
* cannot exceed 16383, the maximum value accepted by the service.
@@ -197,11 +214,13 @@ export interface Config {
197214
* user-provided value will be used to extend the default value.
198215
*/
199216
export const defaultConfig = {
200-
clsMechanism: 'auto' as CLSMechanism,
201217
logLevel: 1,
202218
enabled: true,
203219
enhancedDatabaseReporting: false,
204220
rootSpanNameOverride: (name: string) => name,
221+
clsMechanism: 'auto' as CLSMechanism,
222+
spansPerTraceSoftLimit: 200,
223+
spansPerTraceHardLimit: 1000,
205224
maximumLabelValueSize: 512,
206225
plugins: {
207226
// enable all by default

src/trace-api.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ export interface StackdriverTracerConfig extends
3737
enhancedDatabaseReporting: boolean;
3838
ignoreContextHeader: boolean;
3939
rootSpanNameOverride: (path: string) => string;
40+
spansPerTraceSoftLimit: number;
41+
spansPerTraceHardLimit: number;
4042
}
4143

4244
interface IncomingTraceContext {
@@ -236,12 +238,50 @@ export class StackdriverTracer implements Tracer {
236238
// with continuously growing number of child spans. The second case
237239
// seems to have some value, but isn't representable. The user probably
238240
// needs a custom outer span that encompasses the entirety of work.
239-
this.logger!.warn(`TraceApi#createChildSpan: [${
241+
this.logger!.error(`TraceApi#createChildSpan: [${
240242
this.pluginName}] Creating phantom child span [${
241243
options.name}] because root span [${
242244
rootSpan.span.name}] was already closed.`);
243245
return UNCORRELATED_CHILD_SPAN;
244246
}
247+
if (rootSpan.trace.spans.length >= this.config!.spansPerTraceHardLimit) {
248+
// As in the previous case, a root span with a large number of child
249+
// spans suggests a memory leak stemming from context confusion. This
250+
// is likely due to userspace task queues or Promise implementations.
251+
this.logger!.error(`TraceApi#createChildSpan: [${
252+
this.pluginName}] Creating phantom child span [${
253+
options.name}] because the trace with root span [${
254+
rootSpan.span.name}] has reached a limit of ${
255+
this.config!
256+
.spansPerTraceHardLimit} spans. This is likely a memory leak.`);
257+
this.logger!.error([
258+
'TraceApi#createChildSpan: Please see',
259+
'https://github.com/googleapis/cloud-trace-nodejs/wiki',
260+
'for details and suggested actions.'
261+
].join(' '));
262+
return UNCORRELATED_CHILD_SPAN;
263+
}
264+
if (rootSpan.trace.spans.length === this.config!.spansPerTraceSoftLimit) {
265+
// As in the previous case, a root span with a large number of child
266+
// spans suggests a memory leak stemming from context confusion. This
267+
// is likely due to userspace task queues or Promise implementations.
268+
269+
// Note that since child spans can be created by users directly on a
270+
// RootSpanData instance, this block might be skipped because it only
271+
// checks equality -- this is OK because no automatic tracing plugin
272+
// uses the RootSpanData API directly.
273+
this.logger!.warn(`TraceApi#createChildSpan: [${
274+
this.pluginName}] Adding child span [${
275+
options.name}] will cause the trace with root span [${
276+
rootSpan.span.name}] to contain more than ${
277+
this.config!
278+
.spansPerTraceSoftLimit} spans. This is likely a memory leak.`);
279+
this.logger!.error([
280+
'TraceApi#createChildSpan: Please see',
281+
'https://github.com/googleapis/cloud-trace-nodejs/wiki',
282+
'for details and suggested actions.'
283+
].join(' '));
284+
}
245285
// Create a new child span and return it.
246286
const childContext = rootSpan.createChildSpan({
247287
name: options.name,

test/plugins/common.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ shimmer.wrap(trace, 'start', function(original) {
6969
enhancedDatabaseReporting: false,
7070
ignoreContextHeader: false,
7171
rootSpanNameOverride: (name: string) => name,
72-
samplingRate: 0
72+
samplingRate: 0,
73+
spansPerTraceSoftLimit: Infinity,
74+
spansPerTraceHardLimit: Infinity
7375
}, new TestLogger());
7476
testTraceAgent.policy = new TracingPolicy.TraceAllPolicy();
7577
return result;

test/test-plugin-loader.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ describe('Trace Plugin Loader', () => {
4848
enhancedDatabaseReporting: false,
4949
ignoreContextHeader: false,
5050
rootSpanNameOverride: (name: string) => name,
51-
projectId: '0'
51+
projectId: '0',
52+
spansPerTraceSoftLimit: Infinity,
53+
spansPerTraceHardLimit: Infinity
5254
},
5355
config),
5456
logger);

test/test-trace-api.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ describe('Trace Interface', () => {
3939
enhancedDatabaseReporting: false,
4040
ignoreContextHeader: false,
4141
rootSpanNameOverride: (name: string) => name,
42-
samplingRate: 0
42+
samplingRate: 0,
43+
spansPerTraceSoftLimit: Infinity,
44+
spansPerTraceHardLimit: Infinity
4345
},
4446
config),
4547
logger);
@@ -155,6 +157,25 @@ describe('Trace Interface', () => {
155157
});
156158
});
157159

160+
it('should warn when the spans per trace soft limit has been exceeded',
161+
() => {
162+
const tracer = createTraceAgent(
163+
null, {spansPerTraceSoftLimit: 10, spansPerTraceHardLimit: 20});
164+
tracer.runInRootSpan({name: 'root'}, (rootSpan) => {
165+
for (let i = 0; i < 10; i++) {
166+
tracer.createChildSpan({name: `span-${i}`}).endSpan();
167+
}
168+
assert.strictEqual(logger.getNumLogsWith('warn', '[span-9]'), 1);
169+
for (let i = 0; i < 9; i++) {
170+
tracer.createChildSpan({name: `span-${i + 10}`}).endSpan();
171+
}
172+
const child = tracer.createChildSpan({name: `span-19`});
173+
assert.ok(!tracer.isRealSpan(child));
174+
assert.strictEqual(logger.getNumLogsWith('error', '[span-19]'), 1);
175+
rootSpan.endSpan();
176+
});
177+
});
178+
158179
it('should return null context id when one does not exist', () => {
159180
const traceAPI = createTraceAgent();
160181
assert.strictEqual(traceAPI.getCurrentContextId(), null);

0 commit comments

Comments
 (0)