Skip to content

Commit 154b90a

Browse files
authored
Add coordinated sampling for Exception Debugging (#6974)
if the first stack level generates a snapshot after sampling we keep continuing to generate snapshot for next levels by forcing sampling (not calling the sampler). For each evaluation of exception probe we query the state by throwable instance and based on this state (list of snapshots) we take the decision keep sampling or drop)
1 parent 0059a39 commit 154b90a

8 files changed

Lines changed: 90 additions & 33 deletions

File tree

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,9 @@ public void handleException(Throwable t, AgentSpan span) {
6565
}
6666
processSnapshotsAndSetTags(t, span, state, innerMostException);
6767
} else {
68-
exceptionProbeManager.createProbesForException(
69-
fingerprint, innerMostException.getStackTrace());
70-
AgentTaskScheduler.INSTANCE.execute(() -> applyExceptionConfiguration(fingerprint));
68+
if (exceptionProbeManager.createProbesForException(innerMostException.getStackTrace())) {
69+
AgentTaskScheduler.INSTANCE.execute(() -> applyExceptionConfiguration(fingerprint));
70+
}
7171
}
7272
}
7373

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ public ClassNameFiltering getClassNameFiltering() {
3737
return classNameFiltering;
3838
}
3939

40-
public void createProbesForException(String fingerprint, StackTraceElement[] stackTraceElements) {
40+
public boolean createProbesForException(StackTraceElement[] stackTraceElements) {
41+
boolean created = false;
4142
for (StackTraceElement stackTraceElement : stackTraceElements) {
4243
if (stackTraceElement.isNativeMethod() || stackTraceElement.getLineNumber() < 0) {
4344
// Skip native methods and lines without line numbers
@@ -54,8 +55,10 @@ public void createProbesForException(String fingerprint, StackTraceElement[] sta
5455
null,
5556
String.valueOf(stackTraceElement.getLineNumber()));
5657
ExceptionProbe probe = createMethodProbe(this, where);
58+
created = true;
5759
probes.putIfAbsent(probe.getId(), probe);
5860
}
61+
return created;
5962
}
6063

6164
void addFingerprint(String fingerprint) {
@@ -103,7 +106,7 @@ public ThrowableState getSateByThrowable(Throwable throwable) {
103106

104107
public static class ThrowableState {
105108
private final String exceptionId;
106-
private final List<Snapshot> snapshots = new ArrayList<>();
109+
private List<Snapshot> snapshots;
107110

108111
private ThrowableState(String exceptionId) {
109112
this.exceptionId = exceptionId;
@@ -117,7 +120,14 @@ public List<Snapshot> getSnapshots() {
117120
return snapshots;
118121
}
119122

123+
public boolean isSampling() {
124+
return snapshots != null && !snapshots.isEmpty();
125+
}
126+
120127
public void addSnapshot(Snapshot snapshot) {
128+
if (snapshots == null) {
129+
snapshots = new ArrayList<>();
130+
}
121131
snapshots.add(snapshot);
122132
}
123133
}

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.datadog.debugger.probe;
22

3+
import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable;
34
import static java.util.Collections.emptyList;
45

56
import com.datadog.debugger.el.ProbeCondition;
@@ -66,12 +67,25 @@ public void evaluate(
6667
return;
6768
}
6869
Throwable throwable = context.getCapturedThrowable().getThrowable();
70+
Throwable innerMostThrowable = getInnerMostThrowable(throwable);
6971
String fingerprint =
70-
Fingerprinter.fingerprint(throwable, exceptionProbeManager.getClassNameFiltering());
72+
Fingerprinter.fingerprint(
73+
innerMostThrowable, exceptionProbeManager.getClassNameFiltering());
7174
if (exceptionProbeManager.shouldCaptureException(fingerprint)) {
7275
LOGGER.debug("Capturing exception matching fingerprint: {}", fingerprint);
7376
// capture only on uncaught exception matching the fingerprint
7477
ExceptionProbeStatus exceptionStatus = (ExceptionProbeStatus) status;
78+
ExceptionProbeManager.ThrowableState state =
79+
exceptionProbeManager.getSateByThrowable(innerMostThrowable);
80+
if (state != null) {
81+
// Already unwinding the exception
82+
if (!state.isSampling()) {
83+
// skip snapshot because no snapshot from previous stack level
84+
return;
85+
}
86+
// Force for coordinated sampling
87+
exceptionStatus.setForceSampling(true);
88+
}
7589
exceptionStatus.setCapture(true);
7690
super.evaluate(context, status, methodLocation);
7791
}

dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/LogProbe.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,11 +404,15 @@ public void evaluate(
404404
}
405405

406406
private void sample(LogStatus logStatus, MethodLocation methodLocation) {
407+
if (logStatus.isForceSampling()) {
408+
return;
409+
}
407410
// sample only once and when we need to evaluate
408411
if (!MethodLocation.isSame(methodLocation, evaluateAt)) {
409412
return;
410413
}
411414
boolean sampled = ProbeRateLimiter.tryProbe(id);
415+
LOGGER.debug("Probe[{}] sampled={}", probeId.getId(), sampled);
412416
logStatus.setSampled(sampled);
413417
if (!sampled) {
414418
DebuggerAgent.getSink().skipSnapshot(id, DebuggerContext.SkipCause.RATE);
@@ -586,6 +590,7 @@ public static class LogStatus extends CapturedContext.Status {
586590
private boolean hasLogTemplateErrors;
587591
private boolean hasConditionErrors;
588592
private boolean sampled = true;
593+
private boolean forceSampling;
589594
private String message;
590595

591596
public LogStatus(ProbeImplementation probeImplementation) {
@@ -654,6 +659,14 @@ public void setSampled(boolean sampled) {
654659
public boolean isSampled() {
655660
return sampled;
656661
}
662+
663+
public boolean isForceSampling() {
664+
return forceSampling;
665+
}
666+
667+
public void setForceSampling(boolean forceSampling) {
668+
this.forceSampling = forceSampling;
669+
}
657670
}
658671

659672
@Generated

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/agent/CapturedSnapshotTest.java

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
import datadog.trace.agent.tooling.TracerInstaller;
4949
import datadog.trace.api.Config;
5050
import datadog.trace.api.interceptor.MutableSpan;
51-
import datadog.trace.api.sampling.Sampler;
5251
import datadog.trace.bootstrap.debugger.*;
5352
import datadog.trace.bootstrap.debugger.el.ValueReferences;
5453
import datadog.trace.bootstrap.debugger.util.Redaction;
@@ -2071,8 +2070,8 @@ private void doSamplingTest(TestMethod testRun, int expectedGlobalCount, int exp
20712070
} finally {
20722071
ProbeRateLimiter.setSamplerSupplier(null);
20732072
}
2074-
assertEquals(expectedGlobalCount, globalSampler.callCount);
2075-
assertEquals(expectedProbeCount, probeSampler.callCount);
2073+
assertEquals(expectedGlobalCount, globalSampler.getCallCount());
2074+
assertEquals(expectedProbeCount, probeSampler.getCallCount());
20762075
}
20772076

20782077
@Test
@@ -2537,27 +2536,6 @@ public void instrumentationResult(ProbeDefinition definition, InstrumentationRes
25372536
}
25382537
}
25392538

2540-
static class MockSampler implements Sampler {
2541-
2542-
int callCount;
2543-
2544-
@Override
2545-
public boolean sample() {
2546-
callCount++;
2547-
return true;
2548-
}
2549-
2550-
@Override
2551-
public boolean keep() {
2552-
return false;
2553-
}
2554-
2555-
@Override
2556-
public boolean drop() {
2557-
return false;
2558-
}
2559-
}
2560-
25612539
static class KotlinHelper {
25622540
public static Class<?> compileAndLoad(
25632541
String className, String sourceFileName, List<File> outputFilesToDelete) {
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.datadog.debugger.agent;
2+
3+
import datadog.trace.api.sampling.Sampler;
4+
5+
public class MockSampler implements Sampler {
6+
7+
private int callCount;
8+
9+
@Override
10+
public boolean sample() {
11+
callCount++;
12+
return true;
13+
}
14+
15+
@Override
16+
public boolean keep() {
17+
return false;
18+
}
19+
20+
@Override
21+
public boolean drop() {
22+
return false;
23+
}
24+
25+
public int getCallCount() {
26+
return callCount;
27+
}
28+
}

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeInstrumentationTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.datadog.debugger.agent.DebuggerAgentHelper;
1919
import com.datadog.debugger.agent.DebuggerTransformer;
2020
import com.datadog.debugger.agent.JsonSnapshotSerializer;
21+
import com.datadog.debugger.agent.MockSampler;
2122
import com.datadog.debugger.probe.ExceptionProbe;
2223
import com.datadog.debugger.sink.DebuggerSink;
2324
import com.datadog.debugger.sink.ProbeStatusSink;
@@ -30,6 +31,7 @@
3031
import datadog.trace.api.interceptor.MutableSpan;
3132
import datadog.trace.bootstrap.debugger.DebuggerContext;
3233
import datadog.trace.bootstrap.debugger.ProbeLocation;
34+
import datadog.trace.bootstrap.debugger.ProbeRateLimiter;
3335
import datadog.trace.core.CoreTracer;
3436
import java.lang.instrument.ClassFileTransformer;
3537
import java.lang.instrument.Instrumentation;
@@ -63,19 +65,26 @@ public class ExceptionProbeInstrumentationTest {
6365
"org.joor.",
6466
"com.datadog.debugger.exception.")
6567
.collect(Collectors.toSet()));
68+
private MockSampler probeSampler;
69+
private MockSampler globalSampler;
6670

6771
@BeforeEach
6872
public void before() {
6973
CoreTracer tracer = CoreTracer.builder().build();
7074
TracerInstaller.forceInstallGlobalTracer(tracer);
7175
tracer.addTraceInterceptor(traceInterceptor);
76+
probeSampler = new MockSampler();
77+
globalSampler = new MockSampler();
78+
ProbeRateLimiter.setSamplerSupplier(rate -> rate < 101 ? probeSampler : globalSampler);
79+
ProbeRateLimiter.setGlobalSnapshotRate(1000);
7280
}
7381

7482
@AfterEach
7583
public void after() {
7684
if (currentTransformer != null) {
7785
instr.removeTransformer(currentTransformer);
7886
}
87+
ProbeRateLimiter.setSamplerSupplier(null);
7988
}
8089

8190
@Test
@@ -123,6 +132,8 @@ public void instrumentAndCaptureSnapshots() throws Exception {
123132
assertEquals(snapshot0.getExceptionId(), span.getTags().get(DD_DEBUG_ERROR_EXCEPTION_ID));
124133
assertEquals(Boolean.TRUE, span.getTags().get(ERROR_DEBUG_INFO_CAPTURED));
125134
assertEquals(snapshot0.getId(), span.getTags().get(String.format(SNAPSHOT_ID_TAG_FMT, 0)));
135+
assertEquals(1, probeSampler.getCallCount());
136+
assertEquals(1, globalSampler.getCallCount());
126137
}
127138

128139
@Test
@@ -200,6 +211,9 @@ public void recursive() throws Exception {
200211
assertEquals("3", getValue(snapshot2.getCaptures().getReturn().getArguments().get("n")));
201212
Snapshot snapshot9 = listener.snapshots.get(9);
202213
assertEquals("10", getValue(snapshot9.getCaptures().getReturn().getArguments().get("n")));
214+
// sampling happens only once ont he first snapshot then forced for coordinated sampling
215+
assertEquals(1, probeSampler.getCallCount());
216+
assertEquals(1, globalSampler.getCallCount());
203217
}
204218

205219
private static void assertExceptionMsg(String expectedMsg, Snapshot snapshot) {

dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public void instrumentStackTrace() {
2121
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
2222
RuntimeException exception = new RuntimeException("test");
2323
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
24-
exceptionProbeManager.createProbesForException(fingerprint, exception.getStackTrace());
24+
exceptionProbeManager.createProbesForException(exception.getStackTrace());
2525
assertFalse(exceptionProbeManager.getProbes().isEmpty());
2626
}
2727

@@ -42,7 +42,7 @@ void instrumentSingleFrame() {
4242

4343
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
4444
assertEquals("d2e9d63e304d95f6435d77bf4d0d387521591e550be21d432339a14ee1cb40", fingerprint);
45-
exceptionProbeManager.createProbesForException(fingerprint, exception.getStackTrace());
45+
exceptionProbeManager.createProbesForException(exception.getStackTrace());
4646
assertEquals(1, exceptionProbeManager.getProbes().size());
4747
ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next();
4848
assertEquals(
@@ -67,7 +67,7 @@ void filterAllFrames() {
6767
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
6868
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
6969
assertEquals("7a1e5e1bcc64ee26801d1471245eff6b6e8d7c61d0ea36fe85f3f75d79e42c", fingerprint);
70-
exceptionProbeManager.createProbesForException("", exception.getStackTrace());
70+
exceptionProbeManager.createProbesForException(exception.getStackTrace());
7171
assertEquals(0, exceptionProbeManager.getProbes().size());
7272
}
7373
}

0 commit comments

Comments
 (0)