Skip to content

Commit 2cd7686

Browse files
committed
gh-138122: Validate base frame before caching in remote debugging frame cache
The frame cache in the remote debugging module was storing frame chains without validating that they reached the base frame. This could happen when a frame chain was interrupted or when the process state changed during reading, resulting in incomplete stacks being cached. Subsequent samples that hit the cache would then produce flamegraphs that didn't reach the bottom of the call stack. The fix passes base_frame_addr through to process_frame_chain() which already has validation logic to ensure the frame walk terminates at the expected sentinel frame. By enabling this validation in the caching code path and tracking whether we've confirmed reaching the base frame, we now only store complete frame chains in the cache. When extending from cached data, we trust that the cached frames were already validated at storage time, maintaining the invariant that cached stacks are always complete. An integration test using deeply nested generators that oscillate the stack depth is added to verify that all sampled stacks contain the entry point function. This catches regressions where incomplete stacks might be cached and returned.
1 parent 790a46a commit 2cd7686

File tree

6 files changed

+152
-17
lines changed

6 files changed

+152
-17
lines changed

Lib/test/test_profiling/test_sampling_profiler/test_integration.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,3 +863,98 @@ def test_async_aware_running_sees_only_cpu_task(self):
863863
self.assertGreater(cpu_percentage, 90.0,
864864
f"cpu_leaf should dominate samples in 'running' mode, "
865865
f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
866+
867+
868+
def _generate_deep_generators_script(chain_depth=20, recurse_depth=150):
869+
"""Generate a script with deep nested generators for stress testing."""
870+
lines = [
871+
'import sys',
872+
'sys.setrecursionlimit(5000)',
873+
'',
874+
]
875+
# Generate chain of yield-from functions
876+
for i in range(chain_depth - 1):
877+
lines.extend([
878+
f'def deep_yield_chain_{i}(n):',
879+
f' yield ("L{i}", n)',
880+
f' yield from deep_yield_chain_{i + 1}(n)',
881+
'',
882+
])
883+
# Last chain function calls recursive_diver
884+
lines.extend([
885+
f'def deep_yield_chain_{chain_depth - 1}(n):',
886+
f' yield ("L{chain_depth - 1}", n)',
887+
f' yield from recursive_diver(n, {chain_depth})',
888+
'',
889+
'def recursive_diver(n, depth):',
890+
' yield (f"DIVE_{depth}", n)',
891+
f' if depth < {recurse_depth}:',
892+
' yield from recursive_diver(n, depth + 1)',
893+
' else:',
894+
' for i in range(5):',
895+
' yield (f"BOTTOM_{depth}", i)',
896+
'',
897+
'def oscillating_generator(iterations=1000):',
898+
' for i in range(iterations):',
899+
' yield ("OSCILLATE", i)',
900+
' yield from deep_yield_chain_0(i)',
901+
'',
902+
'def run_forever():',
903+
' while True:',
904+
' for _ in oscillating_generator(10):',
905+
' pass',
906+
'',
907+
'_test_sock.sendall(b"working")',
908+
'run_forever()',
909+
])
910+
return '\n'.join(lines)
911+
912+
913+
@requires_remote_subprocess_debugging()
914+
class TestDeepGeneratorFrameCache(unittest.TestCase):
915+
"""Test frame cache consistency with deep oscillating generator stacks."""
916+
917+
def test_all_stacks_share_same_base_frame(self):
918+
"""Verify all sampled stacks reach the entry point function.
919+
920+
When profiling deep generators that oscillate up and down the call
921+
stack, every sample should include the entry point function
922+
(run_forever) in its call chain. If the frame cache stores
923+
incomplete stacks, some samples will be missing this base function,
924+
causing broken flamegraphs.
925+
"""
926+
script = _generate_deep_generators_script()
927+
with test_subprocess(script, wait_for_working=True) as subproc:
928+
collector = CollapsedStackCollector(sample_interval_usec=1, skip_idle=False)
929+
930+
with (
931+
io.StringIO() as captured_output,
932+
mock.patch("sys.stdout", captured_output),
933+
):
934+
profiling.sampling.sample.sample(
935+
subproc.process.pid,
936+
collector,
937+
duration_sec=2,
938+
)
939+
940+
samples_with_entry_point = 0
941+
samples_without_entry_point = 0
942+
total_samples = 0
943+
944+
for (call_tree, _thread_id), count in collector.stack_counter.items():
945+
total_samples += count
946+
if call_tree:
947+
has_entry_point = call_tree and call_tree[0][2] == "<module>"
948+
if has_entry_point:
949+
samples_with_entry_point += count
950+
else:
951+
samples_without_entry_point += count
952+
953+
self.assertGreater(total_samples, 100,
954+
f"Expected at least 100 samples, got {total_samples}")
955+
956+
self.assertEqual(samples_without_entry_point, 0,
957+
f"Found {samples_without_entry_point}/{total_samples} samples "
958+
f"missing the entry point function 'run_forever'. This indicates "
959+
f"incomplete stacks are being returned, likely due to frame cache "
960+
f"storing partial stack traces.")
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix incomplete stack traces in the Tachyon profiler's frame cache when
2+
profiling code with deeply nested generators. The frame cache now validates
3+
that stack traces reach the base frame before caching, preventing broken
4+
flamegraphs. Patch by Pablo Galindo.

Modules/_remote_debugging/_remote_debugging.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,8 @@ extern int process_frame_chain(
429429
int *stopped_at_cached_frame,
430430
uintptr_t *frame_addrs,
431431
Py_ssize_t *num_addrs,
432-
Py_ssize_t max_addrs
432+
Py_ssize_t max_addrs,
433+
uintptr_t *out_last_frame_addr
433434
);
434435

435436
/* Frame cache functions */
@@ -447,18 +448,22 @@ extern int frame_cache_lookup_and_extend(
447448
Py_ssize_t *num_addrs,
448449
Py_ssize_t max_addrs);
449450
// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
451+
// Only stores complete stacks that reach base_frame_addr
450452
extern int frame_cache_store(
451453
RemoteUnwinderObject *unwinder,
452454
uint64_t thread_id,
453455
PyObject *frame_list,
454456
const uintptr_t *addrs,
455-
Py_ssize_t num_addrs);
457+
Py_ssize_t num_addrs,
458+
uintptr_t base_frame_addr,
459+
uintptr_t last_frame_visited);
456460

457461
extern int collect_frames_with_cache(
458462
RemoteUnwinderObject *unwinder,
459463
uintptr_t frame_addr,
460464
StackChunkList *chunks,
461465
PyObject *frame_info,
466+
uintptr_t base_frame_addr,
462467
uintptr_t gc_frame,
463468
uintptr_t last_profiled_frame,
464469
uint64_t thread_id);

Modules/_remote_debugging/frame_cache.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,19 +194,33 @@ frame_cache_lookup_and_extend(
194194
}
195195

196196
// Store frame list with addresses in cache
197+
// Only stores complete stacks that reach base_frame_addr (validation done internally)
197198
// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
198199
int
199200
frame_cache_store(
200201
RemoteUnwinderObject *unwinder,
201202
uint64_t thread_id,
202203
PyObject *frame_list,
203204
const uintptr_t *addrs,
204-
Py_ssize_t num_addrs)
205+
Py_ssize_t num_addrs,
206+
uintptr_t base_frame_addr,
207+
uintptr_t last_frame_visited)
205208
{
206209
if (!unwinder->frame_cache || thread_id == 0) {
207210
return 0;
208211
}
209212

213+
// Validate we have a complete stack before caching.
214+
// Only cache if last_frame_visited matches base_frame_addr (the sentinel
215+
// at the bottom of the stack). Note: we use last_frame_visited rather than
216+
// addrs[num_addrs-1] because the base frame is visited but not added to the
217+
// addrs array (it returns frame==NULL from is_frame_valid due to
218+
// owner==FRAME_OWNED_BY_INTERPRETER).
219+
if (base_frame_addr != 0 && last_frame_visited != base_frame_addr) {
220+
// Incomplete stack - don't cache (graceful degradation)
221+
return 0;
222+
}
223+
210224
// Clamp to max frames
211225
if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
212226
num_addrs = FRAME_CACHE_MAX_FRAMES;

Modules/_remote_debugging/frames.c

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -265,18 +265,22 @@ process_frame_chain(
265265
int *stopped_at_cached_frame,
266266
uintptr_t *frame_addrs, // optional: C array to receive frame addresses
267267
Py_ssize_t *num_addrs, // in/out: current count / updated count
268-
Py_ssize_t max_addrs) // max capacity of frame_addrs array
268+
Py_ssize_t max_addrs, // max capacity of frame_addrs array
269+
uintptr_t *out_last_frame_addr) // optional: receives last frame address visited
269270
{
270271
uintptr_t frame_addr = initial_frame_addr;
271272
uintptr_t prev_frame_addr = 0;
272273
uintptr_t last_frame_addr = 0; // Track last frame visited for validation
273274
const size_t MAX_FRAMES = 1024 + 512;
274275
size_t frame_count = 0;
275276

276-
// Initialize output flag
277+
// Initialize output parameters
277278
if (stopped_at_cached_frame) {
278279
*stopped_at_cached_frame = 0;
279280
}
281+
if (out_last_frame_addr) {
282+
*out_last_frame_addr = 0;
283+
}
280284

281285
// Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
282286
if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
@@ -390,6 +394,11 @@ process_frame_chain(
390394
return -1;
391395
}
392396

397+
// Set output parameter for caller (needed for cache validation)
398+
if (out_last_frame_addr) {
399+
*out_last_frame_addr = last_frame_addr;
400+
}
401+
393402
return 0;
394403
}
395404

@@ -537,6 +546,7 @@ collect_frames_with_cache(
537546
uintptr_t frame_addr,
538547
StackChunkList *chunks,
539548
PyObject *frame_info,
549+
uintptr_t base_frame_addr,
540550
uintptr_t gc_frame,
541551
uintptr_t last_profiled_frame,
542552
uint64_t thread_id)
@@ -551,11 +561,13 @@ collect_frames_with_cache(
551561
uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
552562
Py_ssize_t num_addrs = 0;
553563
Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
564+
uintptr_t last_frame_visited = 0;
554565

555566
int stopped_at_cached = 0;
556-
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
567+
if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, base_frame_addr, gc_frame,
557568
last_profiled_frame, &stopped_at_cached,
558-
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
569+
addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
570+
&last_frame_visited) < 0) {
559571
return -1;
560572
}
561573

@@ -575,23 +587,28 @@ collect_frames_with_cache(
575587
// Cache miss - continue walking from last_profiled_frame to get the rest
576588
STATS_INC(unwinder, frame_cache_misses);
577589
Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
578-
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
579-
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
590+
if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, base_frame_addr, gc_frame,
591+
0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES,
592+
&last_frame_visited) < 0) {
580593
return -1;
581594
}
582595
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
583596
} else {
584-
// Partial cache hit
597+
// Partial cache hit - cache was validated when stored, so we trust it
585598
STATS_INC(unwinder, frame_cache_partial_hits);
586599
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
587600
}
588-
} else if (last_profiled_frame == 0) {
589-
// No cache involvement (no last_profiled_frame or cache disabled)
590-
STATS_INC(unwinder, frame_cache_misses);
601+
} else {
602+
if (last_profiled_frame == 0) {
603+
// No cache involvement (no last_profiled_frame or cache disabled)
604+
STATS_INC(unwinder, frame_cache_misses);
605+
}
591606
}
592607

593-
// Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
594-
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
608+
// Store in cache - frame_cache_store validates internally that we have a
609+
// complete stack (reached base_frame_addr) before actually storing
610+
if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs,
611+
base_frame_addr, last_frame_visited) < 0) {
595612
return -1;
596613
}
597614

Modules/_remote_debugging/threads.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ unwind_stack_for_thread(
430430
uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
431431
unwinder->debug_offsets.thread_state.last_profiled_frame);
432432
if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
433-
gc_frame, last_profiled_frame, tid) < 0) {
433+
base_frame_addr, gc_frame, last_profiled_frame, tid) < 0) {
434434
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
435435
goto error;
436436
}
@@ -444,7 +444,7 @@ unwind_stack_for_thread(
444444
} else {
445445
// No caching - process entire frame chain with base_frame validation
446446
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
447-
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
447+
base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0, NULL) < 0) {
448448
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
449449
goto error;
450450
}

0 commit comments

Comments
 (0)