Skip to content

Commit 75df705

Browse files
fm4vclaude
andcommitted
feat: add coverage load-base offset conversion and debugAddressToSymbol function
Convert coverage addresses to file offsets by subtracting the binary's load base so that dumps are portable across processes with different ASLR bases. This matches how `addressToSymbol` works: the fallback path in `SymbolIndex::findSymbol` treats values as raw file offsets when they're not in any mapped object. Add `SymbolIndex::diagnose` to explain why symbol resolution succeeds or fails: returns `"found:<name>"`, `"no_object"`, or `"no_symbol[object=<path>:offset=0x<hex>]"`. Add `debugAddressToSymbol` SQL function backed by `diagnose`, useful for investigating why `coverageCurrent()` produces empty symbols. Co-Authored-By: Claude Sonnet 4.6 (1M context) <[email protected]>
1 parent 34cd334 commit 75df705

File tree

6 files changed

+212
-3
lines changed

6 files changed

+212
-3
lines changed

src/Common/Coverage.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <vector>
1010

1111
#include <Common/IO.h>
12+
#include <Common/SymbolIndex.h>
1213
#include <base/coverage.h>
1314

1415
#include <fmt/format.h>
@@ -35,14 +36,23 @@ void dumpCoverage()
3536

3637
if (const char * coverage_filename_prefix = getenv("CLICKHOUSE_WRITE_COVERAGE")) // NOLINT(concurrency-mt-unsafe)
3738
{
38-
auto dump = [](const std::string & name, auto span)
39+
/// Convert runtime virtual addresses to file offsets by subtracting the binary's load base.
40+
/// This makes the dump portable across processes with different ASLR bases:
41+
/// when the server inserts the dump and calls addressToSymbol(), SymbolIndex::findSymbol()
42+
/// will use the fallback path that treats the value directly as a file offset — the same
43+
/// mechanism used by system.stack_trace (after PR #82809).
44+
uintptr_t load_base = 0;
45+
if (const DB::SymbolIndex::Object * self = DB::SymbolIndex::instance().thisObject())
46+
load_base = reinterpret_cast<uintptr_t>(self->address_begin);
47+
48+
auto dump = [load_base](const std::string & name, auto span)
3949
{
40-
/// Write only non-zeros.
50+
/// Write only non-zeros, converted to file offsets.
4151
std::vector<uintptr_t> data;
4252
data.reserve(span.size());
4353
for (auto addr : span)
4454
if (addr)
45-
data.push_back(addr);
55+
data.push_back(addr - load_base);
4656

4757
int fd = ::open(name.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0400);
4858
if (-1 == fd)

src/Common/SymbolIndex.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#include <Common/MemoryTrackerDebugBlockerInThread.h>
77
#include <Common/SymbolIndex.h>
88

9+
#include <fmt/format.h>
10+
911
#include <algorithm>
1012
#include <optional>
1113

@@ -544,6 +546,28 @@ const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const
544546
return find(address, data.objects);
545547
}
546548

549+
String SymbolIndex::diagnose(const void * address) const
550+
{
551+
/// Mirror findSymbol exactly: if address is not in any mapped object,
552+
/// fall back to treating it as a raw file offset (same as findSymbol does).
553+
const Object * object = findObject(address);
554+
const void * offset = address;
555+
556+
if (object)
557+
offset = reinterpret_cast<const void *>(
558+
reinterpret_cast<uintptr_t>(address) - reinterpret_cast<uintptr_t>(object->address_begin));
559+
560+
const Symbol * symbol = find(offset, data.symbols);
561+
if (!symbol)
562+
{
563+
if (!object)
564+
return "no_object";
565+
return fmt::format("no_symbol[object={}:offset=0x{:x}]", object->name, reinterpret_cast<uintptr_t>(offset));
566+
}
567+
568+
return fmt::format("found:{}", symbol->name);
569+
}
570+
547571
const SymbolIndex::Object * SymbolIndex::thisObject() const
548572
{
549573
return findObject(reinterpret_cast<const void *>(+[]{}));

src/Common/SymbolIndex.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ class SymbolIndex : private boost::noncopyable
4242
const Object * findObject(const void * address) const;
4343
const Object * thisObject() const;
4444

45+
/// For debugging: explain why findSymbol returned nullptr.
46+
/// Returns the symbol name (if found), "no_object" (address not in any mapped binary),
47+
/// or "no_symbol[object=<name>:offset=0x<hex>]" (in binary but no symbol at that offset).
48+
String diagnose(const void * address) const;
49+
4550
const std::vector<Symbol> & symbols() const { return data.symbols; }
4651
const std::vector<Object> & objects() const { return data.objects; }
4752

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#if defined(__ELF__) && !defined(OS_FREEBSD)
2+
3+
#include <Common/SymbolIndex.h>
4+
#include <Columns/ColumnString.h>
5+
#include <Columns/ColumnsNumber.h>
6+
#include <DataTypes/DataTypeString.h>
7+
#include <Functions/IFunction.h>
8+
#include <Functions/FunctionFactory.h>
9+
#include <Functions/FunctionHelpers.h>
10+
#include <Access/Common/AccessFlags.h>
11+
#include <Interpreters/Context.h>
12+
13+
14+
namespace DB
15+
{
16+
17+
namespace ErrorCodes
18+
{
19+
extern const int ILLEGAL_COLUMN;
20+
}
21+
22+
namespace
23+
{
24+
25+
/// Diagnostic variant of addressToSymbol.
26+
/// Instead of returning empty string on failure, returns:
27+
/// "no_object" — address not in any mapped binary
28+
/// "no_symbol[object=<path>:offset=0x<hex>]" — in binary but no ELF symbol at that offset
29+
/// "<demangled_name>" — resolved successfully (same as addressToSymbol)
30+
///
31+
/// Use this to categorize why coverageCurrent() produces empty symbols:
32+
///
33+
/// SELECT debugAddressToSymbol(arrayJoin(coverageCurrent())) AS diag,
34+
/// count() AS cnt
35+
/// GROUP BY diag
36+
/// ORDER BY cnt DESC
37+
/// LIMIT 20
38+
class FunctionDebugAddressToSymbol : public IFunction
39+
{
40+
public:
41+
static constexpr auto name = "debugAddressToSymbol";
42+
43+
static FunctionPtr create(ContextPtr context)
44+
{
45+
context->checkAccess(AccessType::addressToSymbol);
46+
return std::make_shared<FunctionDebugAddressToSymbol>();
47+
}
48+
49+
String getName() const override { return name; }
50+
size_t getNumberOfArguments() const override { return 1; }
51+
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
52+
bool useDefaultImplementationForConstants() const override { return true; }
53+
54+
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
55+
{
56+
FunctionArgumentDescriptors mandatory_args{
57+
{"address_of_binary_instruction", &isUInt64, nullptr, "UInt64"}
58+
};
59+
validateFunctionArguments(*this, arguments, mandatory_args);
60+
return std::make_shared<DataTypeString>();
61+
}
62+
63+
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
64+
{
65+
const ColumnPtr & column = arguments[0].column;
66+
const ColumnUInt64 * column_concrete = checkAndGetColumn<ColumnUInt64>(column.get());
67+
68+
if (!column_concrete)
69+
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
70+
"Illegal column {} of argument of function {}", column->getName(), getName());
71+
72+
const typename ColumnVector<UInt64>::Container & data = column_concrete->getData();
73+
auto result_column = ColumnString::create();
74+
75+
const SymbolIndex & symbol_index = SymbolIndex::instance();
76+
77+
for (size_t i = 0; i < input_rows_count; ++i)
78+
{
79+
String diag = symbol_index.diagnose(reinterpret_cast<const void *>(data[i]));
80+
result_column->insertData(diag.data(), diag.size());
81+
}
82+
83+
return result_column;
84+
}
85+
};
86+
87+
}
88+
89+
REGISTER_FUNCTION(DebugAddressToSymbol)
90+
{
91+
factory.registerFunction<FunctionDebugAddressToSymbol>(
92+
FunctionDocumentation
93+
{
94+
.description = R"(
95+
Diagnostic variant of `addressToSymbol`. Returns a string explaining why symbol resolution succeeded or failed:
96+
- Symbol name if resolution succeeded
97+
- `"no_object"` if the address is not within any mapped binary's address range
98+
- `"no_symbol[object=<path>:offset=0x<hex>]"` if the address maps into a binary but no ELF symbol covers that file offset
99+
100+
Requires `allow_introspection_functions = 1`.
101+
102+
Typical usage to categorize coverage failures:
103+
104+
```sql
105+
SELECT debugAddressToSymbol(arrayJoin(coverageCurrent())) AS diag, count() AS cnt
106+
GROUP BY diag ORDER BY cnt DESC LIMIT 20
107+
```
108+
)",
109+
.category = FunctionDocumentation::Category::Introspection
110+
}
111+
);
112+
}
113+
114+
}
115+
116+
#endif

tests/queries/0_stateless/04039_debug_address_to_symbol.reference

Whitespace-only changes.
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
-- Tags: no-parallel
2+
-- Validates debugAddressToSymbol diagnostic function using system.stack_trace.
3+
-- Does NOT require SANITIZE_COVERAGE.
4+
--
5+
-- debugAddressToSymbol(addr) returns:
6+
-- "no_object" — address not in any mapped binary's address range,
7+
-- and also fails as a raw file offset
8+
-- "no_symbol_in_object:<path>:offset=0x<hex>" — in binary but no ELF symbol at offset
9+
-- "found:<symbol>" — resolved successfully
10+
11+
SET allow_introspection_functions = 1;
12+
13+
-- 1. Invalid address → must return 'no_object'
14+
SELECT debugAddressToSymbol(toUInt64(1234)) AS result;
15+
16+
-- 2. Valid stack_trace addresses → addressToSymbol returns non-empty for them,
17+
-- debugAddressToSymbol must return 'found:' prefixed strings (never 'no_object')
18+
SELECT countIf(NOT startsWith(diag, 'found:')) = 0 AS all_valid_addrs_are_found
19+
FROM (
20+
SELECT
21+
addr,
22+
addressToSymbol(addr) AS sym,
23+
debugAddressToSymbol(addr) AS diag
24+
FROM (SELECT arrayJoin(trace) AS addr FROM system.stack_trace LIMIT 1)
25+
WHERE sym != ''
26+
);
27+
28+
-- 3. debugAddressToSymbol 'found:' result must contain the same symbol as addressToSymbol
29+
SELECT countIf(substring(diag, 7) != sym) = 0 AS found_prefix_matches_addressToSymbol
30+
FROM (
31+
SELECT
32+
addressToSymbol(addr) AS sym,
33+
debugAddressToSymbol(addr) AS diag
34+
FROM (SELECT arrayJoin(trace) AS addr FROM system.stack_trace LIMIT 1)
35+
WHERE sym != ''
36+
AND startsWith(debugAddressToSymbol(addr), 'found:')
37+
);
38+
39+
-- 4. Category breakdown of stack_trace addresses (informational)
40+
SELECT
41+
multiIf(
42+
diag = 'no_object', 'no_object',
43+
startsWith(diag, 'no_symbol'), 'no_symbol_in_object',
44+
startsWith(diag, 'found:'), 'found',
45+
'other'
46+
) AS category,
47+
count() AS cnt
48+
FROM (
49+
SELECT debugAddressToSymbol(arrayJoin(trace)) AS diag
50+
FROM system.stack_trace
51+
LIMIT 1
52+
)
53+
GROUP BY category
54+
ORDER BY cnt DESC;

0 commit comments

Comments
 (0)