Skip to content

Commit 1c2baea

Browse files
committed
[StringMap] Store occupancy in a packed used-bit array
Track bucket occupancy in a packed 1-bit-per-bucket "used" array (uint32 words) instead of testing the bucket pointer for null, mirroring the DenseMap change #201281. The bucket pointers, the parallel hash array, and the used array share one allocation. StringMap buckets are pointers to heap-allocated entries, so the probe loop previously loaded the sparse 8-byte pointer on every step just to test for an empty bucket. The empty test now reads a dense bit, so find-miss and insert probing touch the used array and the 4-byte hash array, deferring the pointer load until a hash match. Iteration scans the used words 32 buckets at a time. Empty buckets become don't-care: the allocation no longer needs zeroing beyond the used words, and the trailing iteration sentinel is gone. instructions:u may rise slightly from the per-insert used-bit writes, but wall time is neutral-to-faster: the find-miss and iteration gains are cache effects an instruction count does not see.
1 parent 6951870 commit 1c2baea

5 files changed

Lines changed: 209 additions & 114 deletions

File tree

llvm/docs/ProgrammersManual.rst

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2384,12 +2384,14 @@ This container guarantees the "``(char*)(&Value+1)``" points to the key string
23842384
for a value.
23852385

23862386
The ``StringMap`` is very fast for several reasons: linear probing is very cache
2387-
efficient for lookups, the hash value of strings in buckets is not recomputed
2388-
when looking up an element, ``StringMap`` rarely has to touch the memory for
2389-
unrelated objects when looking up a value (even when hash collisions happen),
2390-
hash table growth does not recompute the hash values for strings already in the
2391-
table, and each pair in the map is store in a single allocation (the string data
2392-
is stored in the same allocation as the Value of a pair).
2387+
efficient for lookups, bucket occupancy is tracked in a packed 1-bit-per-bucket
2388+
array so probing tests a dense bit rather than loading the sparse bucket pointer,
2389+
the hash value of strings in buckets is not recomputed when looking up an
2390+
element, ``StringMap`` rarely has to touch the memory for unrelated objects when
2391+
looking up a value (even when hash collisions happen), hash table growth does not
2392+
recompute the hash values for strings already in the table, and each pair in the
2393+
map is store in a single allocation (the string data is stored in the same
2394+
allocation as the Value of a pair).
23932395

23942396
``StringMap`` also provides query methods that take byte ranges, so it only ever
23952397
copies a string if a value is inserted into the table.

llvm/include/llvm/ADT/StringMap.h

Lines changed: 107 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
#define LLVM_ADT_STRINGMAP_H
1616

1717
#include "llvm/ADT/StringMapEntry.h"
18+
#include "llvm/ADT/bit.h"
1819
#include "llvm/ADT/iterator.h"
1920
#include "llvm/Support/AllocatorBase.h"
2021
#include "llvm/Support/Compiler.h"
2122
#include "llvm/Support/PointerLikeTypeTraits.h"
23+
#include <cstdint>
24+
#include <cstring>
2225
#include <initializer_list>
2326
#include <iterator>
2427
#include <type_traits>
@@ -31,15 +34,40 @@ template <typename ValueTy> class StringMapKeyIterator;
3134
/// StringMapImpl - This is the base class of StringMap that is shared among
3235
/// all of its instantiations.
3336
class StringMapImpl {
37+
public:
38+
// Occupancy is tracked in a packed 1-bit-per-bucket "used" array (uint32
39+
// words), like DenseMap. Probing and iteration test a dense bit rather than
40+
// loading the sparse bucket pointer.
41+
using UsedT = uint32_t;
42+
43+
// Number of used words backing NumBuckets buckets.
44+
static constexpr unsigned usedWords(unsigned N) { return (N + 31) / 32; }
45+
static bool isUsed(const UsedT *U, unsigned I) {
46+
return (U[I >> 5] >> (I & 31)) & 1;
47+
}
48+
static void setUsed(UsedT *U, unsigned I) {
49+
U[I >> 5] |= UsedT(1) << (I & 31);
50+
}
51+
static void unsetUsed(UsedT *U, unsigned I) {
52+
U[I >> 5] &= ~(UsedT(1) << (I & 31));
53+
}
54+
3455
protected:
35-
// Array of NumBuckets pointers to entries, null pointers are holes.
36-
// TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed
37-
// by an array of the actual hash values as unsigned integers.
56+
// One allocation holds, in order: NumBuckets entry pointers (occupied slots
57+
// only; empty slots are don't-care), NumBuckets parallel hash values, and the
58+
// packed used-bit array recording which slots are occupied.
3859
StringMapEntryBase **TheTable = nullptr;
3960
unsigned NumBuckets = 0;
4061
unsigned NumItems = 0;
4162
unsigned ItemSize;
4263

64+
unsigned *getHashArray() const {
65+
return reinterpret_cast<unsigned *>(TheTable + NumBuckets);
66+
}
67+
UsedT *getUsed() const {
68+
return reinterpret_cast<UsedT *>(getHashArray() + NumBuckets);
69+
}
70+
4371
protected:
4472
explicit StringMapImpl(unsigned itemSize) : ItemSize(itemSize) {}
4573
StringMapImpl(StringMapImpl &&RHS)
@@ -55,10 +83,10 @@ class StringMapImpl {
5583
LLVM_ABI unsigned RehashTable(unsigned BucketNo = 0);
5684

5785
/// LookupBucketFor - Look up the bucket that the specified string should end
58-
/// up in. If it already exists as a key in the map, the Item pointer for the
59-
/// specified bucket will be non-null. Otherwise, it will be null. In either
60-
/// case, the FullHashValue field of the bucket will be set to the hash value
61-
/// of the string.
86+
/// up in. If it already exists as a key in the map, the bucket's used bit
87+
/// will be set. Otherwise the bucket is empty (used bit clear) and its
88+
/// FullHashValue is set to the hash value of the string so the caller can
89+
/// fill it in.
6290
unsigned LookupBucketFor(StringRef Key) {
6391
return LookupBucketFor(Key, hash(Key));
6492
}
@@ -86,10 +114,6 @@ class StringMapImpl {
86114
/// setup the map as empty.
87115
LLVM_ABI void init(unsigned Size);
88116

89-
iterator_range<StringMapEntryBase **> buckets() {
90-
return make_range(TheTable, TheTable + NumBuckets);
91-
}
92-
93117
public:
94118
[[nodiscard]] unsigned getNumBuckets() const { return NumBuckets; }
95119
[[nodiscard]] unsigned getNumItems() const { return NumItems; }
@@ -150,21 +174,21 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
150174
if (RHS.empty())
151175
return;
152176

153-
// Allocate TheTable of the same size as RHS's TheTable, and set the
154-
// sentinel appropriately (and NumBuckets).
177+
// Allocate TheTable of the same size as RHS's TheTable (and NumBuckets).
155178
init(RHS.NumBuckets);
156-
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1),
157-
*RHSHashTable = (unsigned *)(RHS.TheTable + NumBuckets + 1);
179+
unsigned *HashTable = getHashArray(), *RHSHashTable = RHS.getHashArray();
180+
UsedT *Used = getUsed();
181+
const UsedT *RHSUsed = RHS.getUsed();
158182

159183
NumItems = RHS.NumItems;
160184
// Copy the bucket layout verbatim. Because RHS was built with linear
161185
// probing, preserving each entry's slot keeps the probe-sequence invariant
162-
// intact without re-probing.
186+
// intact without re-probing. Empty slots stay don't-care.
187+
std::memcpy(Used, RHSUsed, usedWords(NumBuckets) * sizeof(UsedT));
163188
for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
164-
StringMapEntryBase *Bucket = RHS.TheTable[I];
165-
if (!Bucket)
189+
if (!isUsed(Used, I))
166190
continue;
167-
191+
StringMapEntryBase *Bucket = RHS.TheTable[I];
168192
TheTable[I] = MapEntryTy::create(
169193
static_cast<MapEntryTy *>(Bucket)->getKey(), getAllocator(),
170194
static_cast<MapEntryTy *>(Bucket)->getValue());
@@ -183,11 +207,10 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
183207
// to default values. This is a copy of clear(), but avoids unnecessary
184208
// work not required in the destructor.
185209
if (!empty()) {
186-
for (StringMapEntryBase *Bucket : buckets()) {
187-
if (Bucket) {
188-
static_cast<MapEntryTy *>(Bucket)->Destroy(getAllocator());
189-
}
190-
}
210+
const UsedT *Used = getUsed();
211+
for (unsigned I = 0, E = NumBuckets; I != E; ++I)
212+
if (isUsed(Used, I))
213+
static_cast<MapEntryTy *>(TheTable[I])->Destroy(getAllocator());
191214
}
192215
}
193216

@@ -201,13 +224,19 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
201224
using const_iterator = StringMapIterBase<ValueTy, true>;
202225
using iterator = StringMapIterBase<ValueTy, false>;
203226

204-
[[nodiscard]] iterator begin() { return iterator(TheTable, NumBuckets != 0); }
205-
[[nodiscard]] iterator end() { return iterator(TheTable + NumBuckets); }
227+
[[nodiscard]] iterator begin() {
228+
return iterator(TheTable, TheTable, getUsed(), NumBuckets, NumBuckets != 0);
229+
}
230+
[[nodiscard]] iterator end() {
231+
return iterator(TheTable + NumBuckets, TheTable, getUsed(), NumBuckets);
232+
}
206233
[[nodiscard]] const_iterator begin() const {
207-
return const_iterator(TheTable, NumBuckets != 0);
234+
return const_iterator(TheTable, TheTable, getUsed(), NumBuckets,
235+
NumBuckets != 0);
208236
}
209237
[[nodiscard]] const_iterator end() const {
210-
return const_iterator(TheTable + NumBuckets);
238+
return const_iterator(TheTable + NumBuckets, TheTable, getUsed(),
239+
NumBuckets);
211240
}
212241

213242
[[nodiscard]] iterator_range<StringMapKeyIterator<ValueTy>> keys() const {
@@ -221,7 +250,7 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
221250
int Bucket = FindKey(Key, FullHashValue);
222251
if (Bucket == -1)
223252
return end();
224-
return iterator(TheTable + Bucket);
253+
return iterator(TheTable + Bucket, TheTable, getUsed(), NumBuckets);
225254
}
226255

227256
[[nodiscard]] const_iterator find(StringRef Key) const {
@@ -233,7 +262,7 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
233262
int Bucket = FindKey(Key, FullHashValue);
234263
if (Bucket == -1)
235264
return end();
236-
return const_iterator(TheTable + Bucket);
265+
return const_iterator(TheTable + Bucket, TheTable, getUsed(), NumBuckets);
237266
}
238267

239268
/// lookup - Return the entry for the specified key, or a default
@@ -301,11 +330,12 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
301330
/// insert it and return true.
302331
bool insert(MapEntryTy *KeyValue) {
303332
unsigned BucketNo = LookupBucketFor(KeyValue->getKey());
304-
StringMapEntryBase *&Bucket = TheTable[BucketNo];
305-
if (Bucket)
333+
UsedT *Used = getUsed();
334+
if (isUsed(Used, BucketNo))
306335
return false; // Already exists in map.
307336

308-
Bucket = KeyValue;
337+
TheTable[BucketNo] = KeyValue;
338+
setUsed(Used, BucketNo);
309339
++NumItems;
310340
assert(NumItems <= NumBuckets);
311341

@@ -366,32 +396,33 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
366396
uint32_t FullHashValue,
367397
ArgsTy &&...Args) {
368398
unsigned BucketNo = LookupBucketFor(Key, FullHashValue);
369-
StringMapEntryBase *&Bucket = TheTable[BucketNo];
370-
if (Bucket)
371-
return {iterator(TheTable + BucketNo), false}; // Already exists in map.
399+
UsedT *Used = getUsed();
400+
if (isUsed(Used, BucketNo)) // Already exists in map.
401+
return {iterator(TheTable + BucketNo, TheTable, Used, NumBuckets), false};
372402

373-
Bucket =
403+
TheTable[BucketNo] =
374404
MapEntryTy::create(Key, getAllocator(), std::forward<ArgsTy>(Args)...);
405+
setUsed(Used, BucketNo);
375406
++NumItems;
376407
assert(NumItems <= NumBuckets);
377408

378409
BucketNo = RehashTable(BucketNo);
379-
return {iterator(TheTable + BucketNo), true};
410+
return {iterator(TheTable + BucketNo, TheTable, getUsed(), NumBuckets),
411+
true};
380412
}
381413

382414
// clear - Empties out the StringMap
383415
void clear() {
384416
if (empty())
385417
return;
386418

387-
// Zap all values, resetting the keys back to non-present, which is safe
388-
// because we're removing all elements.
389-
for (StringMapEntryBase *&Bucket : buckets()) {
390-
if (Bucket) {
391-
static_cast<MapEntryTy *>(Bucket)->Destroy(getAllocator());
392-
}
393-
Bucket = nullptr;
394-
}
419+
// Destroy all values and clear the occupancy bits, marking every bucket
420+
// non-present.
421+
UsedT *Used = getUsed();
422+
for (unsigned I = 0, E = NumBuckets; I != E; ++I)
423+
if (isUsed(Used, I))
424+
static_cast<MapEntryTy *>(TheTable[I])->Destroy(getAllocator());
425+
std::memset(Used, 0, usedWords(NumBuckets) * sizeof(UsedT));
395426

396427
NumItems = 0;
397428
}
@@ -416,7 +447,14 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
416447
};
417448

418449
template <typename ValueTy, bool IsConst> class StringMapIterBase {
450+
using UsedT = StringMapImpl::UsedT;
451+
419452
StringMapEntryBase **Ptr = nullptr;
453+
// The bucket base and the parallel used array map the current bucket back to
454+
// its index so AdvancePastEmptyBuckets can consult the occupancy bits.
455+
StringMapEntryBase **Buckets = nullptr;
456+
const UsedT *Used = nullptr;
457+
unsigned NumBuckets = 0;
420458

421459
public:
422460
using iterator_category = std::forward_iterator_tag;
@@ -428,8 +466,10 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
428466

429467
StringMapIterBase() = default;
430468

431-
explicit StringMapIterBase(StringMapEntryBase **Bucket, bool Advance = false)
432-
: Ptr(Bucket) {
469+
explicit StringMapIterBase(StringMapEntryBase **Bucket,
470+
StringMapEntryBase **Buckets, const UsedT *Used,
471+
unsigned NumBuckets, bool Advance = false)
472+
: Ptr(Bucket), Buckets(Buckets), Used(Used), NumBuckets(NumBuckets) {
433473
if (Advance)
434474
AdvancePastEmptyBuckets();
435475
}
@@ -456,7 +496,7 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
456496
template <bool ToConst,
457497
typename = typename std::enable_if<!IsConst && ToConst>::type>
458498
operator StringMapIterBase<ValueTy, ToConst>() const {
459-
return StringMapIterBase<ValueTy, ToConst>(Ptr);
499+
return StringMapIterBase<ValueTy, ToConst>(Ptr, Buckets, Used, NumBuckets);
460500
}
461501

462502
friend bool operator==(const StringMapIterBase &LHS,
@@ -470,9 +510,25 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
470510
}
471511

472512
private:
513+
// Advance Ptr to the next occupied bucket, scanning the used array a word (32
514+
// buckets) at a time, or to the end if none remain.
473515
void AdvancePastEmptyBuckets() {
474-
while (*Ptr == nullptr)
475-
++Ptr;
516+
size_t I = Ptr - Buckets;
517+
if (I >= NumBuckets) {
518+
Ptr = Buckets + NumBuckets;
519+
return;
520+
}
521+
const unsigned NW = StringMapImpl::usedWords(NumBuckets);
522+
unsigned W = I >> 5;
523+
UsedT Bits = Used[W] & (~UsedT(0) << (I & 31));
524+
while (Bits == 0) {
525+
if (++W == NW) {
526+
Ptr = Buckets + NumBuckets;
527+
return;
528+
}
529+
Bits = Used[W];
530+
}
531+
Ptr = Buckets + ((W << 5) + llvm::countr_zero(Bits));
476532
}
477533
};
478534

0 commit comments

Comments
 (0)