1515#define LLVM_ADT_STRINGMAP_H
1616
1717#include " llvm/ADT/StringMapEntry.h"
18+ #include " llvm/ADT/bit.h"
1819#include " llvm/ADT/iterator.h"
1920#include " llvm/Support/AllocatorBase.h"
2021#include " llvm/Support/Compiler.h"
2122#include " llvm/Support/PointerLikeTypeTraits.h"
23+ #include < cstdint>
24+ #include < cstring>
2225#include < initializer_list>
2326#include < iterator>
2427#include < type_traits>
@@ -31,15 +34,40 @@ template <typename ValueTy> class StringMapKeyIterator;
3134// / StringMapImpl - This is the base class of StringMap that is shared among
3235// / all of its instantiations.
3336class StringMapImpl {
37+ public:
38+ // Occupancy is tracked in a packed 1-bit-per-bucket "used" array (uint32
39+ // words), like DenseMap. Probing and iteration test a dense bit rather than
40+ // loading the sparse bucket pointer.
41+ using UsedT = uint32_t ;
42+
43+ // Number of used words backing NumBuckets buckets.
44+ static constexpr unsigned usedWords (unsigned N) { return (N + 31 ) / 32 ; }
45+ static bool isUsed (const UsedT *U, unsigned I) {
46+ return (U[I >> 5 ] >> (I & 31 )) & 1 ;
47+ }
48+ static void setUsed (UsedT *U, unsigned I) {
49+ U[I >> 5 ] |= UsedT (1 ) << (I & 31 );
50+ }
51+ static void unsetUsed (UsedT *U, unsigned I) {
52+ U[I >> 5 ] &= ~(UsedT (1 ) << (I & 31 ));
53+ }
54+
3455protected:
35- // Array of NumBuckets pointers to entries, null pointers are holes.
36- // TheTable[NumBuckets] contains a sentinel value for easy iteration. Followed
37- // by an array of the actual hash values as unsigned integers .
56+ // One allocation holds, in order: NumBuckets entry pointers (occupied slots
57+ // only; empty slots are don't-care), NumBuckets parallel hash values, and the
58+ // packed used-bit array recording which slots are occupied .
3859 StringMapEntryBase **TheTable = nullptr ;
3960 unsigned NumBuckets = 0 ;
4061 unsigned NumItems = 0 ;
4162 unsigned ItemSize;
4263
64+ unsigned *getHashArray () const {
65+ return reinterpret_cast <unsigned *>(TheTable + NumBuckets);
66+ }
67+ UsedT *getUsed () const {
68+ return reinterpret_cast <UsedT *>(getHashArray () + NumBuckets);
69+ }
70+
4371protected:
4472 explicit StringMapImpl (unsigned itemSize) : ItemSize(itemSize) {}
4573 StringMapImpl (StringMapImpl &&RHS)
@@ -55,10 +83,10 @@ class StringMapImpl {
5583 LLVM_ABI unsigned RehashTable (unsigned BucketNo = 0 );
5684
5785 // / LookupBucketFor - Look up the bucket that the specified string should end
58- // / up in. If it already exists as a key in the map, the Item pointer for the
59- // / specified bucket will be non-null . Otherwise, it will be null. In either
60- // / case, the FullHashValue field of the bucket will be set to the hash value
61- // / of the string .
86+ // / up in. If it already exists as a key in the map, the bucket's used bit
87+ // / will be set . Otherwise the bucket is empty (used bit clear) and its
88+ // / FullHashValue is set to the hash value of the string so the caller can
89+ // / fill it in .
6290 unsigned LookupBucketFor (StringRef Key) {
6391 return LookupBucketFor (Key, hash (Key));
6492 }
@@ -86,10 +114,6 @@ class StringMapImpl {
86114 // / setup the map as empty.
87115 LLVM_ABI void init (unsigned Size);
88116
89- iterator_range<StringMapEntryBase **> buckets () {
90- return make_range (TheTable, TheTable + NumBuckets);
91- }
92-
93117public:
94118 [[nodiscard]] unsigned getNumBuckets () const { return NumBuckets; }
95119 [[nodiscard]] unsigned getNumItems () const { return NumItems; }
@@ -150,21 +174,21 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
150174 if (RHS.empty ())
151175 return ;
152176
153- // Allocate TheTable of the same size as RHS's TheTable, and set the
154- // sentinel appropriately (and NumBuckets).
177+ // Allocate TheTable of the same size as RHS's TheTable (and NumBuckets).
155178 init (RHS.NumBuckets );
156- unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1 ),
157- *RHSHashTable = (unsigned *)(RHS.TheTable + NumBuckets + 1 );
179+ unsigned *HashTable = getHashArray (), *RHSHashTable = RHS.getHashArray ();
180+ UsedT *Used = getUsed ();
181+ const UsedT *RHSUsed = RHS.getUsed ();
158182
159183 NumItems = RHS.NumItems ;
160184 // Copy the bucket layout verbatim. Because RHS was built with linear
161185 // probing, preserving each entry's slot keeps the probe-sequence invariant
162- // intact without re-probing.
186+ // intact without re-probing. Empty slots stay don't-care.
187+ std::memcpy (Used, RHSUsed, usedWords (NumBuckets) * sizeof (UsedT));
163188 for (unsigned I = 0 , E = NumBuckets; I != E; ++I) {
164- StringMapEntryBase *Bucket = RHS.TheTable [I];
165- if (!Bucket)
189+ if (!isUsed (Used, I))
166190 continue ;
167-
191+ StringMapEntryBase *Bucket = RHS. TheTable [I];
168192 TheTable[I] = MapEntryTy::create (
169193 static_cast <MapEntryTy *>(Bucket)->getKey (), getAllocator (),
170194 static_cast <MapEntryTy *>(Bucket)->getValue ());
@@ -183,11 +207,10 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
183207 // to default values. This is a copy of clear(), but avoids unnecessary
184208 // work not required in the destructor.
185209 if (!empty ()) {
186- for (StringMapEntryBase *Bucket : buckets ()) {
187- if (Bucket) {
188- static_cast <MapEntryTy *>(Bucket)->Destroy (getAllocator ());
189- }
190- }
210+ const UsedT *Used = getUsed ();
211+ for (unsigned I = 0 , E = NumBuckets; I != E; ++I)
212+ if (isUsed (Used, I))
213+ static_cast <MapEntryTy *>(TheTable[I])->Destroy (getAllocator ());
191214 }
192215 }
193216
@@ -201,13 +224,19 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
201224 using const_iterator = StringMapIterBase<ValueTy, true >;
202225 using iterator = StringMapIterBase<ValueTy, false >;
203226
204- [[nodiscard]] iterator begin () { return iterator (TheTable, NumBuckets != 0 ); }
205- [[nodiscard]] iterator end () { return iterator (TheTable + NumBuckets); }
227+ [[nodiscard]] iterator begin () {
228+ return iterator (TheTable, TheTable, getUsed (), NumBuckets, NumBuckets != 0 );
229+ }
230+ [[nodiscard]] iterator end () {
231+ return iterator (TheTable + NumBuckets, TheTable, getUsed (), NumBuckets);
232+ }
206233 [[nodiscard]] const_iterator begin () const {
207- return const_iterator (TheTable, NumBuckets != 0 );
234+ return const_iterator (TheTable, TheTable, getUsed (), NumBuckets,
235+ NumBuckets != 0 );
208236 }
209237 [[nodiscard]] const_iterator end () const {
210- return const_iterator (TheTable + NumBuckets);
238+ return const_iterator (TheTable + NumBuckets, TheTable, getUsed (),
239+ NumBuckets);
211240 }
212241
213242 [[nodiscard]] iterator_range<StringMapKeyIterator<ValueTy>> keys () const {
@@ -221,7 +250,7 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
221250 int Bucket = FindKey (Key, FullHashValue);
222251 if (Bucket == -1 )
223252 return end ();
224- return iterator (TheTable + Bucket);
253+ return iterator (TheTable + Bucket, TheTable, getUsed (), NumBuckets );
225254 }
226255
227256 [[nodiscard]] const_iterator find (StringRef Key) const {
@@ -233,7 +262,7 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
233262 int Bucket = FindKey (Key, FullHashValue);
234263 if (Bucket == -1 )
235264 return end ();
236- return const_iterator (TheTable + Bucket);
265+ return const_iterator (TheTable + Bucket, TheTable, getUsed (), NumBuckets );
237266 }
238267
239268 // / lookup - Return the entry for the specified key, or a default
@@ -301,11 +330,12 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
301330 // / insert it and return true.
302331 bool insert (MapEntryTy *KeyValue) {
303332 unsigned BucketNo = LookupBucketFor (KeyValue->getKey ());
304- StringMapEntryBase *&Bucket = TheTable[BucketNo] ;
305- if (Bucket )
333+ UsedT *Used = getUsed () ;
334+ if (isUsed (Used, BucketNo) )
306335 return false ; // Already exists in map.
307336
308- Bucket = KeyValue;
337+ TheTable[BucketNo] = KeyValue;
338+ setUsed (Used, BucketNo);
309339 ++NumItems;
310340 assert (NumItems <= NumBuckets);
311341
@@ -366,32 +396,33 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
366396 uint32_t FullHashValue,
367397 ArgsTy &&...Args) {
368398 unsigned BucketNo = LookupBucketFor (Key, FullHashValue);
369- StringMapEntryBase *&Bucket = TheTable[BucketNo] ;
370- if (Bucket)
371- return {iterator (TheTable + BucketNo), false }; // Already exists in map.
399+ UsedT *Used = getUsed () ;
400+ if (isUsed (Used, BucketNo)) // Already exists in map.
401+ return {iterator (TheTable + BucketNo, TheTable, Used, NumBuckets ), false };
372402
373- Bucket =
403+ TheTable[BucketNo] =
374404 MapEntryTy::create (Key, getAllocator (), std::forward<ArgsTy>(Args)...);
405+ setUsed (Used, BucketNo);
375406 ++NumItems;
376407 assert (NumItems <= NumBuckets);
377408
378409 BucketNo = RehashTable (BucketNo);
379- return {iterator (TheTable + BucketNo), true };
410+ return {iterator (TheTable + BucketNo, TheTable, getUsed (), NumBuckets),
411+ true };
380412 }
381413
382414 // clear - Empties out the StringMap
383415 void clear () {
384416 if (empty ())
385417 return ;
386418
387- // Zap all values, resetting the keys back to non-present, which is safe
388- // because we're removing all elements.
389- for (StringMapEntryBase *&Bucket : buckets ()) {
390- if (Bucket) {
391- static_cast <MapEntryTy *>(Bucket)->Destroy (getAllocator ());
392- }
393- Bucket = nullptr ;
394- }
419+ // Destroy all values and clear the occupancy bits, marking every bucket
420+ // non-present.
421+ UsedT *Used = getUsed ();
422+ for (unsigned I = 0 , E = NumBuckets; I != E; ++I)
423+ if (isUsed (Used, I))
424+ static_cast <MapEntryTy *>(TheTable[I])->Destroy (getAllocator ());
425+ std::memset (Used, 0 , usedWords (NumBuckets) * sizeof (UsedT));
395426
396427 NumItems = 0 ;
397428 }
@@ -416,7 +447,14 @@ class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
416447};
417448
418449template <typename ValueTy, bool IsConst> class StringMapIterBase {
450+ using UsedT = StringMapImpl::UsedT;
451+
419452 StringMapEntryBase **Ptr = nullptr ;
453+ // The bucket base and the parallel used array map the current bucket back to
454+ // its index so AdvancePastEmptyBuckets can consult the occupancy bits.
455+ StringMapEntryBase **Buckets = nullptr ;
456+ const UsedT *Used = nullptr ;
457+ unsigned NumBuckets = 0 ;
420458
421459public:
422460 using iterator_category = std::forward_iterator_tag;
@@ -428,8 +466,10 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
428466
429467 StringMapIterBase () = default ;
430468
431- explicit StringMapIterBase (StringMapEntryBase **Bucket, bool Advance = false )
432- : Ptr(Bucket) {
469+ explicit StringMapIterBase (StringMapEntryBase **Bucket,
470+ StringMapEntryBase **Buckets, const UsedT *Used,
471+ unsigned NumBuckets, bool Advance = false )
472+ : Ptr(Bucket), Buckets(Buckets), Used(Used), NumBuckets(NumBuckets) {
433473 if (Advance)
434474 AdvancePastEmptyBuckets ();
435475 }
@@ -456,7 +496,7 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
456496 template <bool ToConst,
457497 typename = typename std::enable_if<!IsConst && ToConst>::type>
458498 operator StringMapIterBase<ValueTy, ToConst>() const {
459- return StringMapIterBase<ValueTy, ToConst>(Ptr);
499+ return StringMapIterBase<ValueTy, ToConst>(Ptr, Buckets, Used, NumBuckets );
460500 }
461501
462502 friend bool operator ==(const StringMapIterBase &LHS,
@@ -470,9 +510,25 @@ template <typename ValueTy, bool IsConst> class StringMapIterBase {
470510 }
471511
472512private:
513+ // Advance Ptr to the next occupied bucket, scanning the used array a word (32
514+ // buckets) at a time, or to the end if none remain.
473515 void AdvancePastEmptyBuckets () {
474- while (*Ptr == nullptr )
475- ++Ptr;
516+ size_t I = Ptr - Buckets;
517+ if (I >= NumBuckets) {
518+ Ptr = Buckets + NumBuckets;
519+ return ;
520+ }
521+ const unsigned NW = StringMapImpl::usedWords (NumBuckets);
522+ unsigned W = I >> 5 ;
523+ UsedT Bits = Used[W] & (~UsedT (0 ) << (I & 31 ));
524+ while (Bits == 0 ) {
525+ if (++W == NW) {
526+ Ptr = Buckets + NumBuckets;
527+ return ;
528+ }
529+ Bits = Used[W];
530+ }
531+ Ptr = Buckets + ((W << 5 ) + llvm::countr_zero (Bits));
476532 }
477533};
478534
0 commit comments