Skip to content

Commit aec0da1

Browse files
MaskRaykastiglione
andauthored
[utils] Fix DenseMap debugger printers for the packed used-bit array (#201755)
DenseMap no longer use in-band sentinel keys. (#200595 and #201281). Update the GDB pretty printer and LLDB data formatters to test the used bit rather than comparing keys. GDB: advancePastEmptyBuckets relied on DenseMapInfo::getEmptyKey(), which could not be evaluated in GDB and so was disabled, leaving the printer to emit empty and erased buckets. It now walks bucket indices and skips any whose used bit is clear. LLDB: DenseMapSynthetic used a key-uniqueness heuristic to guess which buckets were live, which mishandled a lone erased bucket (hence the former tombstones=1 summary note). It now reads the used array directly, so erased entries are skipped exactly. NumTombstones no longer exists, so drop it from the summary. Written by Claude Opus 4.8 --------- Co-authored-by: Dave Lee <[email protected]>
1 parent a172bbc commit aec0da1

2 files changed

Lines changed: 30 additions & 62 deletions

File tree

llvm/utils/gdb-scripts/prettyprinters.py

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -146,44 +146,34 @@ class DenseMapPrinter:
146146
"Print a DenseMap"
147147

148148
class _iterator:
149-
def __init__(self, key_info_t, begin, end):
150-
self.key_info_t = key_info_t
151-
self.cur = begin
152-
self.end = end
149+
def __init__(self, buckets, used, num_buckets):
150+
self.buckets = buckets
151+
self.used = used
152+
self.num_buckets = num_buckets
153+
self.index = 0
153154
self.advancePastEmptyBuckets()
154155
self.first = True
155156

156157
def __iter__(self):
157158
return self
158159

160+
def isUsed(self, index):
161+
# Occupancy is tracked in a packed 1-bit-per-bucket "used" array of
162+
# uint32_t words, so a bucket is occupied iff its bit is set.
163+
word = self.used[index >> 5]
164+
return (int(word) >> (index & 31)) & 1
165+
159166
def advancePastEmptyBuckets(self):
160-
# disabled until the comments below can be addressed
161-
# keeping as notes/posterity/hints for future contributors
162-
return
163-
n = self.key_info_t.name
164-
is_equal = gdb.parse_and_eval(n + "::isEqual")
165-
empty = gdb.parse_and_eval(n + "::getEmptyKey()")
166-
# the following is invalid, GDB fails with:
167-
# Python Exception <class 'gdb.error'> Attempt to take address of value
168-
# not located in memory.
169-
# because isEqual took parameter (for the unsigned long key I was testing)
170-
# by const ref, and GDB
171-
# It's also not entirely general - we should be accessing the "getFirst()"
172-
# member function, not the 'first' member variable, but I've yet to figure
173-
# out how to find/call member functions (especially (const) overloaded
174-
# ones) on a gdb.Value.
175-
while self.cur != self.end and is_equal(
176-
self.cur.dereference()["first"], empty
177-
):
178-
self.cur = self.cur + 1
167+
while self.index < self.num_buckets and not self.isUsed(self.index):
168+
self.index += 1
179169

180170
def __next__(self):
181-
if self.cur == self.end:
171+
if self.index >= self.num_buckets:
182172
raise StopIteration
183-
cur = self.cur
184-
v = cur.dereference()["first" if self.first else "second"]
173+
bucket = (self.buckets + self.index).dereference()
174+
v = bucket["first" if self.first else "second"]
185175
if not self.first:
186-
self.cur = self.cur + 1
176+
self.index += 1
187177
self.advancePastEmptyBuckets()
188178
self.first = True
189179
else:
@@ -198,9 +188,8 @@ def __init__(self, val):
198188

199189
def children(self):
200190
t = self.val.type.template_argument(3).pointer()
201-
begin = self.val["Buckets"].cast(t)
202-
end = (begin + self.val["NumBuckets"]).cast(t)
203-
return self._iterator(self.val.type.template_argument(2), begin, end)
191+
buckets = self.val["Buckets"].cast(t)
192+
return self._iterator(buckets, self.val["Used"], int(self.val["NumBuckets"]))
204193

205194
def to_string(self):
206195
return "llvm::DenseMap with %d elements" % (self.val["NumEntries"])

llvm/utils/lldbDataFormatters.py

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
from __future__ import annotations
88

9-
import collections
109
from typing import Literal, Optional
1110
import lldb
1211

@@ -448,14 +447,7 @@ def _set_raw_pointer(self, raw_value, min_low_bits):
448447
def DenseMapSummary(valobj: lldb.SBValue, _) -> str:
449448
raw_value = valobj.GetNonSyntheticValue()
450449
num_entries = raw_value.GetChildMemberWithName("NumEntries").unsigned
451-
num_tombstones = raw_value.GetChildMemberWithName("NumTombstones").unsigned
452-
453-
summary = f"size={num_entries}"
454-
if num_tombstones == 1:
455-
# The heuristic to identify valid entries does not handle the case of a
456-
# single tombstone. The summary calls attention to this.
457-
summary = f"tombstones=1, {summary}"
458-
return summary
450+
return f"size={num_entries}"
459451

460452

461453
class DenseMapSynthetic:
@@ -492,31 +484,18 @@ def update(self):
492484
if num_entries == 0:
493485
return
494486

495-
buckets = self.valobj.GetChildMemberWithName("Buckets")
496487
num_buckets = self.valobj.GetChildMemberWithName("NumBuckets").unsigned
497-
498-
# Bucket entries contain one of the following:
499-
# 1. Valid key-value
500-
# 2. Empty key
501-
# 3. Tombstone key (a deleted entry)
502-
#
503-
# NumBuckets is always greater than NumEntries. The empty key, and
504-
# potentially the tombstone key, will occur multiple times. A key that
505-
# is repeated is either the empty key or the tombstone key.
506-
507-
# For each key, collect a list of buckets it appears in.
508-
key_buckets: dict[str, list[int]] = collections.defaultdict(list)
488+
used = self.valobj.GetChildMemberWithName("Used")
489+
490+
# Occupancy is tracked in a packed 1-bit-per-bucket "used" array of
491+
# uint32_t words. A bucket holds a valid entry iff its bit is set;
492+
# empty and erased buckets are clear. Read the whole array in one go
493+
# rather than fetching each word with a separate expression path.
494+
num_words = (num_buckets + 31) // 32
495+
words = used.GetPointeeData(0, num_words).uint32s
509496
for index in range(num_buckets):
510-
bucket = buckets.GetValueForExpressionPath(f"[{index}]")
511-
key = bucket.GetChildAtIndex(0)
512-
key_buckets[str(key.data)].append(index)
513-
514-
# Heuristic: This is not a multi-map, any repeated (non-unique) keys are
515-
# either the the empty key or the tombstone key. Populate child_buckets
516-
# with the indexes of entries containing unique keys.
517-
for indexes in key_buckets.values():
518-
if len(indexes) == 1:
519-
self.child_buckets.append(indexes[0])
497+
if (words[index >> 5] >> (index & 31)) & 1:
498+
self.child_buckets.append(index)
520499

521500

522501
class DenseSetSynthetic:

0 commit comments

Comments
 (0)