-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Labels
Description
Description
We crash in the debugger when running !gcroot or !clrstack -gc when debugging Linux amd64 coredumps. This also means we cannot correctly analyze managed heaps in Visual Studio, PerfView, and other memory diagnostic tools.
We hit the crash here:
(lldb) bt
* thread #1, name = 'dotnet', stop reason = SIGSEGV: address not mapped to object (fault address=0x7fff3d208a40)
* frame #0: 0x000075047c2adf32 libc.so.6`__syscall_cancel_arch at syscall_cancel.S:56
frame #1: 0x000075047c2a239c libc.so.6`__internal_syscall_cancel(a1=<unavailable>, a2=<unavailable>, a3=<unavailable>, a4=<unavailable>, a5=0, a6=0, nr=61) at cancellation.c:49:12
frame #2: 0x000075047c2a23e4 libc.so.6`__syscall_cancel(a1=<unavailable>, a2=<unavailable>, a3=<unavailable>, a4=<unavailable>, a5=0, a6=0, nr=61) at cancellation.c:75:16
frame #3: 0x000075047c31267f libc.so.6`__GI___wait4(pid=<unavailable>, stat_loc=<unavailable>, options=<unavailable>, usage=<unavailable>) at wait4.c:30:10
frame #4: 0x000075047bf1486c libcoreclr.so`___lldb_unnamed_symbol_514460 + 1036
frame #5: 0x000075047bf15b39 libcoreclr.so`___lldb_unnamed_symbol_515240 + 2297
frame #6: 0x000075047bef5e01 libcoreclr.so`___lldb_unnamed_symbol_4f5ce0 + 289
frame #7: 0x000075047bef528d libcoreclr.so`___lldb_unnamed_symbol_4f50c0 + 461
frame #8: 0x000075047c24d2d0 libc.so.6`__restore_rt
frame #9: 0x000074f41ec6cf6c libmscordaccore.so`TGcInfoDecoder<AMD64GcInfoEncoding>::GetStackSlot(this=0x00007ffc7934fe58, spOffset=-216, spBase=GC_FRAMEREG_REL, pRD=0x00007ffc79350570) at gcinfodecoder.cpp:2203:32
frame #10: 0x000074f41ec6d29c libmscordaccore.so`TGcInfoDecoder<AMD64GcInfoEncoding>::ReportStackSlotToGC(this=0x00007ffc7934fe58, spOffset=-216, spBase=GC_FRAMEREG_REL, gcFlags=0, pRD=0x00007ffc79350570, flags=2, pCallBack=(libmscordaccore.so`DacStackReferenceWalker::GCEnumCallbackFunc(void*, __DPtr<Object>*, unsigned int, _DAC_SLOT_LOCATION) at daccess.cpp:7856), hCallBack=0x00007ffc79352e40) at gcinfodecoder.cpp:2244:26
frame #11: 0x000074f41ec6beb9 libmscordaccore.so`TGcInfoDecoder<AMD64GcInfoEncoding>::ReportSlotToGC(this=0x00007ffc7934fe58, slotDecoder=0x00007ffc7934fbd8, slotIndex=5, pRD=0x00007ffc79350570, reportScratchSlots=true, inputFlags=2, pCallBack=(libmscordaccore.so`DacStackReferenceWalker::GCEnumCallbackFunc(void*, __DPtr<Object>*, unsigned int, _DAC_SLOT_LOCATION) at daccess.cpp:7856), hCallBack=0x00007ffc79352e40) at gcinfodecoder.h:774:17
frame #12: 0x000074f41ec6bfda libmscordaccore.so`TGcInfoDecoder<AMD64GcInfoEncoding>::ReportUntrackedSlots(this=0x00007ffc7934fe58, slotDecoder=0x00007ffc7934fbd8, pRD=0x00007ffc79350570, inputFlags=2, pCallBack=(libmscordaccore.so`DacStackReferenceWalker::GCEnumCallbackFunc(void*, __DPtr<Object>*, unsigned int, _DAC_SLOT_LOCATION) at daccess.cpp:7856), hCallBack=0x00007ffc79352e40) at gcinfodecoder.cpp:1139:9
frame #13: 0x000074f41ec67845 libmscordaccore.so`TGcInfoDecoder<AMD64GcInfoEncoding>::EnumerateLiveSlots(this=0x00007ffc7934fe58, pRD=0x00007ffc79350570, reportScratchSlots=false, inputFlags=2, pCallBack=(libmscordaccore.so`DacStackReferenceWalker::GCEnumCallbackFunc(void*, __DPtr<Object>*, unsigned int, _DAC_SLOT_LOCATION) at daccess.cpp:7856), hCallBack=0x00007ffc79352e40) at gcinfodecoder.cpp:1088:9
frame #14: 0x000074f41ec2bf7f libmscordaccore.so`EECodeManager::EnumGcRefs(this=0x000074f42645f6c0, pRD=0x00007ffc79350570, pCodeInfo=0x00007ffc79350390, flags=2, pCallBack=(libmscordaccore.so`DacStackReferenceWalker::GCEnumCallbackFunc(void*, __DPtr<Object>*, unsigned int, _DAC_SLOT_LOCATION) at daccess.cpp:7856), hCallBack=0x00007ffc79352e40, relOffsetOverride=4294967295) at eetwain.cpp:1179:24
frame #15: 0x000074f41eb4040a libmscordaccore.so`DacStackReferenceWalker::Callback(pCF=0x00007ffc79350368, pData=0x00007ffc79352e40) at daccess.cpp:8000:22
frame #16: 0x000074f41ebd744a libmscordaccore.so`Thread::MakeStackwalkerCallback(this=0x000074f4264576d0, pCF=0x00007ffc79350368, pCallback=(libmscordaccore.so`DacStackReferenceWalker::Callback(CrawlFrame*, void*) at daccess.cpp:7963), pData=0x00007ffc79352e40, uFramesProcessed=2) at stackwalk.cpp:763:27
frame #17: 0x000074f41ebd766a libmscordaccore.so`Thread::StackWalkFramesEx(this=0x000074f4264576d0, pRD=0x00007ffc79350570, pCallback=(libmscordaccore.so`DacStackReferenceWalker::Callback(CrawlFrame*, void*) at daccess.cpp:7963), pData=0x00007ffc79352e40, flags=99584, pStartFrame=PTR_Frame @ 0x00007ffc79350568) at stackwalk.cpp:826:26
frame #18: 0x000074f41ebd81ea libmscordaccore.so`Thread::StackWalkFrames(this=0x000074f4264576d0, pCallback=(libmscordaccore.so`DacStackReferenceWalker::Callback(CrawlFrame*, void*) at daccess.cpp:7963), pData=0x00007ffc79352e40, flags=99584, pStartFrame=PTR_Frame @ 0x00007ffc79352e38) at stackwalk.cpp:901:12
frame #19: 0x000074f41eb3f0cb libmscordaccore.so`DacStackReferenceWalker::WalkStack(this=0x00005c9c5b4b47d0) at daccess.cpp:7822:14
frame #20: 0x000074f41eb3f257 libmscordaccore.so`DacStackReferenceWalker::Next(this=0x00005c9c5b4b47d0, count=4096, stackRefs=0x000074f426417010, pFetched=0x00007ffc79353ec8) at daccess.cpp:7751:9
I have a fix and will open a pull request.
Reproduction Steps
Repro:
Compile this app as debug, and generate a crash dump "crash.dmp" by running it:
class GCRootCrash
{
static object TheRoot;
static ConditionalWeakTable<SingleRef, TargetType> _dependent = new ConditionalWeakTable<SingleRef, TargetType>();
static void Main()
{
// Check that coredump environment variables are set.
if (Environment.GetEnvironmentVariable("DOTNET_DbgEnableMiniDump") != "1")
Console.Error.WriteLine("WARNING: DOTNET_DbgEnableMiniDump is not set to 1. No coredump will be generated.");
if (string.IsNullOrEmpty(Environment.GetEnvironmentVariable("DOTNET_DbgMiniDumpName")))
Console.Error.WriteLine("WARNING: DOTNET_DbgMiniDumpName is not set. Coredump filename may be unpredictable.");
if (Environment.GetEnvironmentVariable("DOTNET_DbgMiniDumpType") != "4")
Console.Error.WriteLine("WARNING: DOTNET_DbgMiniDumpType is not set to 4 (Full). Dump may not contain enough data.");
// Build the same object graph as the GCRoot test target. Objects must be
// live on the stack so that the DAC needs to walk stack roots.
TargetType target = new TargetType();
SingleRef s = new SingleRef();
DoubleRef d = new DoubleRef();
TripleRef t = new TripleRef();
TheRoot = s;
object[] arr = new object[42];
s.Item1 = arr;
arr[27] = d;
d.Item1 = new SingleRef() { Item1 = t };
d.Item2 = t;
s = new SingleRef();
t.Item1 = new SingleRef() { Item1 = s };
t.Item2 = s;
t.Item3 = new object();
_dependent.Add(s, target);
// Keep locals alive so the JIT doesn't optimize them away.
// Without these, EnumerateStackRoots finds 0 roots in Release builds.
GC.KeepAlive(target);
GC.KeepAlive(s);
GC.KeepAlive(d);
GC.KeepAlive(t);
GC.KeepAlive(arr);
// Crash to generate a coredump with objects live on the stack.
throw new Exception("crash to generate coredump");
}
}
class SingleRef
{
public object Item1;
}
class DoubleRef
{
public object Item1;
public object Item2;
}
class TripleRef
{
public object Item1;
public object Item2;
public object Item3;
}
class TargetType { }Then you can use dotnet-dump analyze crash.dmp -c "clrstack -gc" and it will crash. An easier to debug app:
// Add Microsoft.Diagnostics.Runtime package
class DacCrash
{
static int Main(string[] args)
{
if (args.Length < 1)
{
Console.Error.WriteLine("Usage: DacCrash <crash-dump-path>");
return 1;
}
string dumpPath = args[0];
Console.WriteLine($"Loading dump: {dumpPath}");
using DataTarget dataTarget = DataTarget.LoadDump(dumpPath);
using ClrRuntime runtime = dataTarget.ClrVersions.Single().CreateRuntime();
ClrHeap heap = runtime.Heap;
Console.WriteLine($"Threads: {runtime.Threads.Length}");
Console.WriteLine($"Heap segments: {heap.Segments.Length}");
Console.WriteLine("Enumerating stack roots (this is where the DAC crash happens)...");
int rootCount = 0;
foreach (ClrThread thread in runtime.Threads.Where(t => t.IsAlive))
{
Console.WriteLine($" Thread {thread.OSThreadId:x} ...");
foreach (ClrStackRoot root in thread.EnumerateStackRoots())
{
rootCount++;
}
}
Console.WriteLine($"Done. Found {rootCount} stack roots (no crash occurred).");
return 0;
}
}Expected behavior
No crashes.
Actual behavior
It crashes.
Regression?
@janvorli, this is a regression from change 1fa1745. @davidwrighton checked in a partial fix in 1a5af14 which handled the x86 case but didn't change amd64/arm64.
Known Workarounds
No response
Configuration
No response
Other information
No response
Reactions are currently unavailable