Skip to content

Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R"#200421

Merged
mstorsjo merged 1 commit into
mainfrom
revert-199615-pr/densemap-backshift
May 29, 2026
Merged

Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R"#200421
mstorsjo merged 1 commit into
mainfrom
revert-199615-pr/densemap-backshift

Conversation

@mstorsjo
Copy link
Copy Markdown
Member

Reverts #199615.

That change causes nondeterministic failed assertions.

@llvmorg-github-actions
Copy link
Copy Markdown

@llvm/pr-subscribers-llvm-ir

Author: Martin Storsjö (mstorsjo)

Changes

Reverts llvm/llvm-project#199615.

That change causes nondeterministic failed assertions.


Full diff: https://github.com/llvm/llvm-project/pull/200421.diff

3 Files Affected:

  • (modified) llvm/include/llvm/ADT/DenseMap.h (+119-91)
  • (modified) llvm/lib/IR/Value.cpp (+1-4)
  • (modified) llvm/unittests/ADT/BitVectorTest.cpp (+2-1)
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index b52906a215a7c..081d7e198e94c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> {
 
 } // end namespace detail
 
-// Befriended below so DenseMapBase can expose its bucket-relocation callback
-// erase to ValueHandleBase, the only caller that caches bucket pointers.
-class ValueHandleBase;
-
 template <typename KeyT, typename ValueT,
           typename KeyInfoT = DenseMapInfo<KeyT>,
           typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
@@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase {
 
   void clear() {
     incrementEpoch();
-    if (getNumEntries() == 0)
+    if (getNumEntries() == 0 && getNumTombstones() == 0)
       return;
 
     // If the capacity of the array is huge, and the # elements used is small,
@@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase {
       for (BucketT &B : buckets())
         B.getFirst() = EmptyKey;
     } else {
+      const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
       unsigned NumEntries = getNumEntries();
       for (BucketT &B : buckets()) {
         if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
-          B.getSecond().~ValueT();
-          --NumEntries;
+          if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
+            B.getSecond().~ValueT();
+            --NumEntries;
+          }
           B.getFirst() = EmptyKey;
         }
       }
@@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase {
       (void)NumEntries;
     }
     setNumEntries(0);
+    setNumTombstones(0);
   }
 
   void shrink_and_clear() {
@@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase {
     return Ret;
   }
 
-  void eraseFromFilledBucket(BucketT *TheBucket) {
-    eraseFromFilledBucket(TheBucket, [](BucketT &) {});
-  }
-
   bool erase(const KeyT &Val) {
     BucketT *TheBucket = doFind(Val);
     if (!TheBucket)
       return false; // not in map.
 
-    eraseFromFilledBucket(TheBucket);
+    incrementEpoch();
+    TheBucket->getSecond().~ValueT();
+    TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+    decrementNumEntries();
+    incrementNumTombstones();
     return true;
   }
-  void erase(iterator I) { eraseFromFilledBucket(&*I); }
+  void erase(iterator I) {
+    BucketT *TheBucket = &*I;
+    incrementEpoch();
+    TheBucket->getSecond().~ValueT();
+    TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+    decrementNumEntries();
+    incrementNumTombstones();
+  }
 
   /// Remove entries that match the given predicate. \p Pred is invoked
   /// with a reference to each live bucket and must not access the map being
@@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase {
   /// into the map are invalidated.
   template <typename Predicate> bool remove_if(Predicate Pred) {
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
-    unsigned NumBuckets = getNumBuckets();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     bool Removed = false;
     for (BucketT &B : buckets()) {
-      if (KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+      if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) ||
+          KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
         continue;
       if (Pred(B)) {
         B.getSecond().~ValueT();
-        B.getFirst() = EmptyKey;
+        B.getFirst() = TombstoneKey;
         decrementNumEntries();
+        incrementNumTombstones();
         Removed = true;
       }
     }
-    if (Removed) {
+    if (Removed)
       incrementEpoch();
-      this->grow(NumBuckets);
-    }
     return Removed;
   }
 
@@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase {
   struct ExactBucketCount {};
 
   void initWithExactBucketCount(unsigned NewNumBuckets) {
-    if (derived().allocateBuckets(NewNumBuckets))
+    if (derived().allocateBuckets(NewNumBuckets)) {
       initEmpty();
-    else
+    } else {
       setNumEntries(0);
+      setNumTombstones(0);
+    }
   }
 
   void destroyAll() {
@@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase {
       return;
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     for (BucketT &B : buckets()) {
-      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
         B.getSecond().~ValueT();
       B.getFirst().~KeyT();
     }
@@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase {
     static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
                   "Must pass the derived type to this template!");
     setNumEntries(0);
+    setNumTombstones(0);
 
     assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 &&
            "# initial buckets must be a power of two!");
@@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase {
   void moveFrom(DerivedT &Other) {
     // Insert all the old elements.
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     for (BucketT &B : Other.buckets()) {
-      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
+      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
         // Insert the key/value into the new table.
         BucketT *DestBucket;
         bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket);
@@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase {
     this->destroyAll();
     derived().deallocateBuckets();
     setNumEntries(0);
+    setNumTombstones(0);
     if (!derived().allocateBuckets(other.getNumBuckets())) {
       // The bucket list is empty.  No work to do.
       return;
@@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase {
     assert(getNumBuckets() == other.getNumBuckets());
 
     setNumEntries(other.getNumEntries());
+    setNumTombstones(other.getNumTombstones());
 
     BucketT *Buckets = getBuckets();
     const BucketT *OtherBuckets = other.getBuckets();
@@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase {
              NumBuckets * sizeof(BucketT));
     } else {
       const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+      const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
       for (size_t I = 0; I < NumBuckets; ++I) {
         ::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
-        if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey))
+        if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+            !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
           ::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
       }
     }
   }
 
 private:
-  // ValueHandleBase caches pointers into the bucket array, so it needs the
-  // callback erase below to fix them up as entries shift. It is the only
-  // intended caller; do not add new ones.
-  friend class ValueHandleBase;
-
-  /// Erase the entry at \p TheBucket and close the resulting hole via Knuth
-  /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket
-  /// array, call \p OnMoved per shifted bucket.
-  template <typename OnMovedT>
-  void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) {
-    incrementEpoch();
-    TheBucket->getSecond().~ValueT();
-    decrementNumEntries();
-
-    BucketT *BucketsPtr = getBuckets();
-    const unsigned NumBuckets = getNumBuckets();
-    const unsigned Mask = NumBuckets - 1;
-    const KeyT EmptyKey = KeyInfoT::getEmptyKey();
-    unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr);
-    unsigned J = I;
-    while (true) {
-      J = (J + 1) & Mask;
-      BucketT &BJ = BucketsPtr[J];
-      if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey))
-        break;
-      auto Ideal = KeyInfoT::getHashValue(BJ.getFirst());
-      // If the hole (I) lies on the linear-probe chain from the home bucket
-      // (Ideal) to J, shift J into the hole and make J the new hole.
-      if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) {
-        BucketT &BI = BucketsPtr[I];
-        BI.getFirst() = std::move(BJ.getFirst());
-        ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond()));
-        BJ.getSecond().~ValueT();
-        OnMoved(BI);
-        I = J;
-      }
-    }
-    BucketsPtr[I].getFirst() = EmptyKey;
-  }
-
-  /// Erase \p Val and close the resulting hole by potentially shifting other
-  /// entries into it. For callers that cache pointers into the bucket array,
-  /// call \p OnMoved per shifted bucket.
-  template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) {
-    BucketT *TheBucket = doFind(Val);
-    if (!TheBucket)
-      return false;
-    eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved));
-    return true;
-  }
-
   DerivedT &derived() { return *static_cast<DerivedT *>(this); }
   const DerivedT &derived() const {
     return *static_cast<const DerivedT *>(this);
@@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase {
 
   void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
 
+  unsigned getNumTombstones() const { return derived().getNumTombstones(); }
+
+  void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); }
+
+  void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+  void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
   const BucketT *getBuckets() const { return derived().getBuckets(); }
 
   BucketT *getBuckets() { return derived().getBuckets(); }
@@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase {
                                   BucketT *TheBucket) {
     incrementEpoch();
 
-    // Grow the table if the load factor would exceed 3/4 after insertion.
-    // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps
-    // every chain compact and bounded by the table's empty-bucket count,
-    // so no tombstone-driven resize is needed.
+    // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+    // the buckets are empty (meaning that many are filled with tombstones),
+    // grow the table.
+    //
+    // The later case is tricky.  For example, if we had one empty bucket with
+    // tons of tombstones, failing lookups (e.g. for insertion) would have to
+    // probe almost the entire table until it found the empty bucket.  If the
+    // table completely filled with tombstones, no lookup would ever succeed,
+    // causing infinite loops in lookup.
     unsigned NewNumEntries = getNumEntries() + 1;
     unsigned NumBuckets = getNumBuckets();
     if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
       this->grow(NumBuckets * 2);
       LookupBucketFor(Lookup, TheBucket);
+    } else if (LLVM_UNLIKELY(NumBuckets -
+                                 (NewNumEntries + getNumTombstones()) <=
+                             NumBuckets / 8)) {
+      this->grow(NumBuckets);
+      LookupBucketFor(Lookup, TheBucket);
     }
     assert(TheBucket);
 
     // Only update the state after we've grown our bucket space appropriately
     // so that when growing buckets we have self-consistent entry count.
     incrementNumEntries();
+
+    // If we are writing over a tombstone, remember this.
+    const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+      decrementNumTombstones();
+
     return TheBucket;
   }
 
@@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase {
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
     unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+    unsigned ProbeAmt = 1;
     while (true) {
       const BucketT *Bucket = BucketsPtr + BucketNo;
       if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
@@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase {
       if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
         return nullptr;
 
-      // Hash collision: continue linear probing.
-      BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+      BucketNo &= NumBuckets - 1;
     }
   }
 
@@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase {
 
   /// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the
   /// bucket contains the key and a value, this returns true, otherwise it
-  /// returns a bucket with an empty marker and returns false.
+  /// returns a bucket with an empty marker or tombstone and returns false.
   template <typename LookupKeyT>
   bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
     BucketT *BucketsPtr = getBuckets();
@@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase {
       return false;
     }
 
+    // FoundTombstone - Keep track of whether we find a tombstone while probing.
+    BucketT *FoundTombstone = nullptr;
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
-           "Empty value shouldn't be inserted into map!");
+           !KeyInfoT::isEqual(Val, TombstoneKey) &&
+           "Empty/Tombstone value shouldn't be inserted into map!");
 
     unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+    unsigned ProbeAmt = 1;
     while (true) {
       BucketT *ThisBucket = BucketsPtr + BucketNo;
       // Found Val's bucket?  If so, return it.
@@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase {
       }
 
       // If we found an empty bucket, the key doesn't exist in the set.
-      // Return it as the insertion point.
+      // Insert it and return the default value.
       if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
-        FoundBucket = ThisBucket;
+        // If we've already seen a tombstone while probing, fill it in instead
+        // of the empty bucket we eventually probed to.
+        FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
         return false;
       }
 
-      // Hash collision: continue linear probing.
-      BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+      // If this is a tombstone, remember it.  If Val ends up not in the map, we
+      // prefer to return it than something that would require more probing.
+      if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+          !FoundTombstone)
+        FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+      BucketNo &= (NumBuckets - 1);
     }
   }
 
@@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
 
   BucketT *Buckets = nullptr;
   unsigned NumEntries = 0;
+  unsigned NumTombstones = 0;
   unsigned NumBuckets = 0;
 
   explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
@@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
   void swapImpl(DenseMap &RHS) {
     std::swap(Buckets, RHS.Buckets);
     std::swap(NumEntries, RHS.NumEntries);
+    std::swap(NumTombstones, RHS.NumTombstones);
     std::swap(NumBuckets, RHS.NumBuckets);
   }
 
@@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
 
   void setNumEntries(unsigned Num) { NumEntries = Num; }
 
+  unsigned getNumTombstones() const { return NumTombstones; }
+
+  void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
   BucketT *getBuckets() const { return Buckets; }
 
   unsigned getNumBuckets() const { return NumBuckets; }
@@ -896,6 +911,7 @@ class SmallDenseMap
 
   unsigned Small : 1;
   unsigned NumEntries : 31;
+  unsigned NumTombstones;
 
   struct LargeRep {
     BucketT *Buckets;
@@ -962,8 +978,10 @@ class SmallDenseMap
     unsigned TmpNumEntries = RHS.NumEntries;
     RHS.NumEntries = NumEntries;
     NumEntries = TmpNumEntries;
+    std::swap(NumTombstones, RHS.NumTombstones);
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     if (Small && RHS.Small) {
       // If we're swapping inline bucket arrays, we have to cope with some of
       // the tricky bits of DenseMap's storage system: the buckets are not
@@ -972,8 +990,10 @@ class SmallDenseMap
       for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
         BucketT *LHSB = &getInlineBuckets()[i],
                 *RHSB = &RHS.getInlineBuckets()[i];
-        bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey);
-        bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey);
+        bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) &&
+                            !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey));
+        bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) &&
+                            !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey));
         if (hasLHSValue && hasRHSValue) {
           // Swap together if we can...
           std::swap(*LHSB, *RHSB);
@@ -1012,7 +1032,8 @@ class SmallDenseMap
               *OldB = &SmallSide.getInlineBuckets()[i];
       ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
       OldB->getFirst().~KeyT();
-      if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) {
+      if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
         ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
         OldB->getSecond().~ValueT();
       }
@@ -1032,6 +1053,10 @@ class SmallDenseMap
     NumEntries = Num;
   }
 
+  unsigned getNumTombstones() const { return NumTombstones; }
+
+  void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
   const BucketT *getInlineBuckets() const {
     assert(Small);
     // Note that this cast does not violate aliasing rules as we assert that
@@ -1119,6 +1144,7 @@ class SmallDenseMap
 
     Small = false;
     NumEntries = Other.NumEntries;
+    NumTombstones = Other.NumTombstones;
     *getLargeRep() = std::move(*Other.getLargeRep());
     Other.getLargeRep()->NumBuckets = 0;
     return true;
@@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
   void AdvancePastEmptyBuckets() {
     assert(Ptr <= End);
     const KeyT Empty = KeyInfoT::getEmptyKey();
+    const KeyT Tombstone = KeyInfoT::getTombstoneKey();
 
-    while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty))
+    while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) ||
+                          KeyInfoT::isEqual(Ptr->getFirst(), Tombstone)))
       ++Ptr;
   }
 
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index f5a501897f0bb..074d42d728c95 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() {
   LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
-    // TODO: Remove the only user of DenseMap's callback erase.
-    Handles.erase(getValPtr(), [](auto &Bucket) {
-      Bucket.second->setPrevPtr(&Bucket.second);
-    });
+    Handles.erase(getValPtr());
     getValPtr()->HasValueHandle = false;
   }
 }
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index adf788b8f9661..d3200b7722ee3 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) {
 
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
   TypeParam D;
-  EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!");
+  EXPECT_DEATH(Set.insert(D),
+               "Empty/Tombstone value shouldn't be inserted into map!");
 #endif
 
   EXPECT_EQ(3U, Set.size());

@llvmorg-github-actions
Copy link
Copy Markdown

@llvm/pr-subscribers-llvm-adt

Author: Martin Storsjö (mstorsjo)

Changes

Reverts llvm/llvm-project#199615.

That change causes nondeterministic failed assertions.


Full diff: https://github.com/llvm/llvm-project/pull/200421.diff

3 Files Affected:

  • (modified) llvm/include/llvm/ADT/DenseMap.h (+119-91)
  • (modified) llvm/lib/IR/Value.cpp (+1-4)
  • (modified) llvm/unittests/ADT/BitVectorTest.cpp (+2-1)
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index b52906a215a7c..081d7e198e94c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> {
 
 } // end namespace detail
 
-// Befriended below so DenseMapBase can expose its bucket-relocation callback
-// erase to ValueHandleBase, the only caller that caches bucket pointers.
-class ValueHandleBase;
-
 template <typename KeyT, typename ValueT,
           typename KeyInfoT = DenseMapInfo<KeyT>,
           typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
@@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase {
 
   void clear() {
     incrementEpoch();
-    if (getNumEntries() == 0)
+    if (getNumEntries() == 0 && getNumTombstones() == 0)
       return;
 
     // If the capacity of the array is huge, and the # elements used is small,
@@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase {
       for (BucketT &B : buckets())
         B.getFirst() = EmptyKey;
     } else {
+      const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
       unsigned NumEntries = getNumEntries();
       for (BucketT &B : buckets()) {
         if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
-          B.getSecond().~ValueT();
-          --NumEntries;
+          if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
+            B.getSecond().~ValueT();
+            --NumEntries;
+          }
           B.getFirst() = EmptyKey;
         }
       }
@@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase {
       (void)NumEntries;
     }
     setNumEntries(0);
+    setNumTombstones(0);
   }
 
   void shrink_and_clear() {
@@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase {
     return Ret;
   }
 
-  void eraseFromFilledBucket(BucketT *TheBucket) {
-    eraseFromFilledBucket(TheBucket, [](BucketT &) {});
-  }
-
   bool erase(const KeyT &Val) {
     BucketT *TheBucket = doFind(Val);
     if (!TheBucket)
       return false; // not in map.
 
-    eraseFromFilledBucket(TheBucket);
+    incrementEpoch();
+    TheBucket->getSecond().~ValueT();
+    TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+    decrementNumEntries();
+    incrementNumTombstones();
     return true;
   }
-  void erase(iterator I) { eraseFromFilledBucket(&*I); }
+  void erase(iterator I) {
+    BucketT *TheBucket = &*I;
+    incrementEpoch();
+    TheBucket->getSecond().~ValueT();
+    TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+    decrementNumEntries();
+    incrementNumTombstones();
+  }
 
   /// Remove entries that match the given predicate. \p Pred is invoked
   /// with a reference to each live bucket and must not access the map being
@@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase {
   /// into the map are invalidated.
   template <typename Predicate> bool remove_if(Predicate Pred) {
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
-    unsigned NumBuckets = getNumBuckets();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     bool Removed = false;
     for (BucketT &B : buckets()) {
-      if (KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+      if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) ||
+          KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
         continue;
       if (Pred(B)) {
         B.getSecond().~ValueT();
-        B.getFirst() = EmptyKey;
+        B.getFirst() = TombstoneKey;
         decrementNumEntries();
+        incrementNumTombstones();
         Removed = true;
       }
     }
-    if (Removed) {
+    if (Removed)
       incrementEpoch();
-      this->grow(NumBuckets);
-    }
     return Removed;
   }
 
@@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase {
   struct ExactBucketCount {};
 
   void initWithExactBucketCount(unsigned NewNumBuckets) {
-    if (derived().allocateBuckets(NewNumBuckets))
+    if (derived().allocateBuckets(NewNumBuckets)) {
       initEmpty();
-    else
+    } else {
       setNumEntries(0);
+      setNumTombstones(0);
+    }
   }
 
   void destroyAll() {
@@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase {
       return;
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     for (BucketT &B : buckets()) {
-      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
         B.getSecond().~ValueT();
       B.getFirst().~KeyT();
     }
@@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase {
     static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
                   "Must pass the derived type to this template!");
     setNumEntries(0);
+    setNumTombstones(0);
 
     assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 &&
            "# initial buckets must be a power of two!");
@@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase {
   void moveFrom(DerivedT &Other) {
     // Insert all the old elements.
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     for (BucketT &B : Other.buckets()) {
-      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
+      if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
         // Insert the key/value into the new table.
         BucketT *DestBucket;
         bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket);
@@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase {
     this->destroyAll();
     derived().deallocateBuckets();
     setNumEntries(0);
+    setNumTombstones(0);
     if (!derived().allocateBuckets(other.getNumBuckets())) {
       // The bucket list is empty.  No work to do.
       return;
@@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase {
     assert(getNumBuckets() == other.getNumBuckets());
 
     setNumEntries(other.getNumEntries());
+    setNumTombstones(other.getNumTombstones());
 
     BucketT *Buckets = getBuckets();
     const BucketT *OtherBuckets = other.getBuckets();
@@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase {
              NumBuckets * sizeof(BucketT));
     } else {
       const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+      const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
       for (size_t I = 0; I < NumBuckets; ++I) {
         ::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
-        if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey))
+        if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+            !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
           ::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
       }
     }
   }
 
 private:
-  // ValueHandleBase caches pointers into the bucket array, so it needs the
-  // callback erase below to fix them up as entries shift. It is the only
-  // intended caller; do not add new ones.
-  friend class ValueHandleBase;
-
-  /// Erase the entry at \p TheBucket and close the resulting hole via Knuth
-  /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket
-  /// array, call \p OnMoved per shifted bucket.
-  template <typename OnMovedT>
-  void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) {
-    incrementEpoch();
-    TheBucket->getSecond().~ValueT();
-    decrementNumEntries();
-
-    BucketT *BucketsPtr = getBuckets();
-    const unsigned NumBuckets = getNumBuckets();
-    const unsigned Mask = NumBuckets - 1;
-    const KeyT EmptyKey = KeyInfoT::getEmptyKey();
-    unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr);
-    unsigned J = I;
-    while (true) {
-      J = (J + 1) & Mask;
-      BucketT &BJ = BucketsPtr[J];
-      if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey))
-        break;
-      auto Ideal = KeyInfoT::getHashValue(BJ.getFirst());
-      // If the hole (I) lies on the linear-probe chain from the home bucket
-      // (Ideal) to J, shift J into the hole and make J the new hole.
-      if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) {
-        BucketT &BI = BucketsPtr[I];
-        BI.getFirst() = std::move(BJ.getFirst());
-        ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond()));
-        BJ.getSecond().~ValueT();
-        OnMoved(BI);
-        I = J;
-      }
-    }
-    BucketsPtr[I].getFirst() = EmptyKey;
-  }
-
-  /// Erase \p Val and close the resulting hole by potentially shifting other
-  /// entries into it. For callers that cache pointers into the bucket array,
-  /// call \p OnMoved per shifted bucket.
-  template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) {
-    BucketT *TheBucket = doFind(Val);
-    if (!TheBucket)
-      return false;
-    eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved));
-    return true;
-  }
-
   DerivedT &derived() { return *static_cast<DerivedT *>(this); }
   const DerivedT &derived() const {
     return *static_cast<const DerivedT *>(this);
@@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase {
 
   void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
 
+  unsigned getNumTombstones() const { return derived().getNumTombstones(); }
+
+  void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); }
+
+  void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+  void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
   const BucketT *getBuckets() const { return derived().getBuckets(); }
 
   BucketT *getBuckets() { return derived().getBuckets(); }
@@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase {
                                   BucketT *TheBucket) {
     incrementEpoch();
 
-    // Grow the table if the load factor would exceed 3/4 after insertion.
-    // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps
-    // every chain compact and bounded by the table's empty-bucket count,
-    // so no tombstone-driven resize is needed.
+    // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+    // the buckets are empty (meaning that many are filled with tombstones),
+    // grow the table.
+    //
+    // The later case is tricky.  For example, if we had one empty bucket with
+    // tons of tombstones, failing lookups (e.g. for insertion) would have to
+    // probe almost the entire table until it found the empty bucket.  If the
+    // table completely filled with tombstones, no lookup would ever succeed,
+    // causing infinite loops in lookup.
     unsigned NewNumEntries = getNumEntries() + 1;
     unsigned NumBuckets = getNumBuckets();
     if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
       this->grow(NumBuckets * 2);
       LookupBucketFor(Lookup, TheBucket);
+    } else if (LLVM_UNLIKELY(NumBuckets -
+                                 (NewNumEntries + getNumTombstones()) <=
+                             NumBuckets / 8)) {
+      this->grow(NumBuckets);
+      LookupBucketFor(Lookup, TheBucket);
     }
     assert(TheBucket);
 
     // Only update the state after we've grown our bucket space appropriately
     // so that when growing buckets we have self-consistent entry count.
     incrementNumEntries();
+
+    // If we are writing over a tombstone, remember this.
+    const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+      decrementNumTombstones();
+
     return TheBucket;
   }
 
@@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase {
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
     unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+    unsigned ProbeAmt = 1;
     while (true) {
       const BucketT *Bucket = BucketsPtr + BucketNo;
       if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
@@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase {
       if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
         return nullptr;
 
-      // Hash collision: continue linear probing.
-      BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+      BucketNo &= NumBuckets - 1;
     }
   }
 
@@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase {
 
   /// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the
   /// bucket contains the key and a value, this returns true, otherwise it
-  /// returns a bucket with an empty marker and returns false.
+  /// returns a bucket with an empty marker or tombstone and returns false.
   template <typename LookupKeyT>
   bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
     BucketT *BucketsPtr = getBuckets();
@@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase {
       return false;
     }
 
+    // FoundTombstone - Keep track of whether we find a tombstone while probing.
+    BucketT *FoundTombstone = nullptr;
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
-           "Empty value shouldn't be inserted into map!");
+           !KeyInfoT::isEqual(Val, TombstoneKey) &&
+           "Empty/Tombstone value shouldn't be inserted into map!");
 
     unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+    unsigned ProbeAmt = 1;
     while (true) {
       BucketT *ThisBucket = BucketsPtr + BucketNo;
       // Found Val's bucket?  If so, return it.
@@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase {
       }
 
       // If we found an empty bucket, the key doesn't exist in the set.
-      // Return it as the insertion point.
+      // Insert it and return the default value.
       if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
-        FoundBucket = ThisBucket;
+        // If we've already seen a tombstone while probing, fill it in instead
+        // of the empty bucket we eventually probed to.
+        FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
         return false;
       }
 
-      // Hash collision: continue linear probing.
-      BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+      // If this is a tombstone, remember it.  If Val ends up not in the map, we
+      // prefer to return it than something that would require more probing.
+      if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+          !FoundTombstone)
+        FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+      BucketNo &= (NumBuckets - 1);
     }
   }
 
@@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
 
   BucketT *Buckets = nullptr;
   unsigned NumEntries = 0;
+  unsigned NumTombstones = 0;
   unsigned NumBuckets = 0;
 
   explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
@@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
   void swapImpl(DenseMap &RHS) {
     std::swap(Buckets, RHS.Buckets);
     std::swap(NumEntries, RHS.NumEntries);
+    std::swap(NumTombstones, RHS.NumTombstones);
     std::swap(NumBuckets, RHS.NumBuckets);
   }
 
@@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
 
   void setNumEntries(unsigned Num) { NumEntries = Num; }
 
+  unsigned getNumTombstones() const { return NumTombstones; }
+
+  void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
   BucketT *getBuckets() const { return Buckets; }
 
   unsigned getNumBuckets() const { return NumBuckets; }
@@ -896,6 +911,7 @@ class SmallDenseMap
 
   unsigned Small : 1;
   unsigned NumEntries : 31;
+  unsigned NumTombstones;
 
   struct LargeRep {
     BucketT *Buckets;
@@ -962,8 +978,10 @@ class SmallDenseMap
     unsigned TmpNumEntries = RHS.NumEntries;
     RHS.NumEntries = NumEntries;
     NumEntries = TmpNumEntries;
+    std::swap(NumTombstones, RHS.NumTombstones);
 
     const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+    const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
     if (Small && RHS.Small) {
       // If we're swapping inline bucket arrays, we have to cope with some of
       // the tricky bits of DenseMap's storage system: the buckets are not
@@ -972,8 +990,10 @@ class SmallDenseMap
       for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
         BucketT *LHSB = &getInlineBuckets()[i],
                 *RHSB = &RHS.getInlineBuckets()[i];
-        bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey);
-        bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey);
+        bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) &&
+                            !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey));
+        bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) &&
+                            !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey));
         if (hasLHSValue && hasRHSValue) {
           // Swap together if we can...
           std::swap(*LHSB, *RHSB);
@@ -1012,7 +1032,8 @@ class SmallDenseMap
               *OldB = &SmallSide.getInlineBuckets()[i];
       ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
       OldB->getFirst().~KeyT();
-      if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) {
+      if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
+          !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
         ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
         OldB->getSecond().~ValueT();
       }
@@ -1032,6 +1053,10 @@ class SmallDenseMap
     NumEntries = Num;
   }
 
+  unsigned getNumTombstones() const { return NumTombstones; }
+
+  void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
   const BucketT *getInlineBuckets() const {
     assert(Small);
     // Note that this cast does not violate aliasing rules as we assert that
@@ -1119,6 +1144,7 @@ class SmallDenseMap
 
     Small = false;
     NumEntries = Other.NumEntries;
+    NumTombstones = Other.NumTombstones;
     *getLargeRep() = std::move(*Other.getLargeRep());
     Other.getLargeRep()->NumBuckets = 0;
     return true;
@@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
   void AdvancePastEmptyBuckets() {
     assert(Ptr <= End);
     const KeyT Empty = KeyInfoT::getEmptyKey();
+    const KeyT Tombstone = KeyInfoT::getTombstoneKey();
 
-    while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty))
+    while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) ||
+                          KeyInfoT::isEqual(Ptr->getFirst(), Tombstone)))
       ++Ptr;
   }
 
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index f5a501897f0bb..074d42d728c95 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() {
   LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
-    // TODO: Remove the only user of DenseMap's callback erase.
-    Handles.erase(getValPtr(), [](auto &Bucket) {
-      Bucket.second->setPrevPtr(&Bucket.second);
-    });
+    Handles.erase(getValPtr());
     getValPtr()->HasValueHandle = false;
   }
 }
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index adf788b8f9661..d3200b7722ee3 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) {
 
 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
   TypeParam D;
-  EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!");
+  EXPECT_DEATH(Set.insert(D),
+               "Empty/Tombstone value shouldn't be inserted into map!");
 #endif
 
   EXPECT_EQ(3U, Set.size());

@mstorsjo mstorsjo enabled auto-merge (squash) May 29, 2026 14:46
@eugeneepshteyn
Copy link
Copy Markdown
Contributor

Note: this PR was made after the original DenseMap PR, so it's possible you may need to revert it as well: #200179

@aengelke
Copy link
Copy Markdown
Contributor

No need to revert #200179. This is independent.

@mstorsjo mstorsjo merged commit 6ce211f into main May 29, 2026
12 of 13 checks passed
@mstorsjo mstorsjo deleted the revert-199615-pr/densemap-backshift branch May 29, 2026 15:29
MaskRay added a commit that referenced this pull request May 30, 2026
…0595)

DenseMap uses quadratic probing with lazy deletion: an erased entry
becomes a tombstone, a third bucket state alongside empty and live that
every find/insert must inspect.

Switch to linear probing with backward-shift deletion (Knuth TAOCP 6.4
Algorithm R), similar to the SmallPtrSet change #197637. This removes
the tombstone state entirely.

In exchange, erase now relocates the following live entries to close the
hole, so it invalidates iterators and references other than the erased
one. For callers that cache pointers into the bucket array,
erase(Key, OnMoved) and erase(iterator, OnMoved) fire a callback once
per
shifted bucket, so fix-ups cost O(cluster) rather than O(NumEntries).
ValueHandleBase::RemoveFromUseList uses this to refresh each moved
handle's PrevPtr.

Linear probing is more vulnerable to primary clustering than quadratic
probing, so this relies on the stronger DenseMapInfo<T*>::getHashValue
mixer from #197390.

Operation distribution when compiling CGExpr.cpp/ScalarEvolution.cpp:
62.8% lookups, 34.3% inserts, 2.9% erases. The heaviest DenseMap
specializations have pointer keys and 16-byte key/value pairs.
Alternatives such as Robin Hood hashing, Verstable, and Boost's
unordered_flat_map were evaluated; they are slower and have a larger
code footprint. I believe the current in-band sentinel value approach,
despite the pain (#146595), is the best, or at least very difficult to
beat.

---

This is a pure reland of #199615 (reverted in #200421) after fixing
the PoisoningVH bug by #200540.
Non-core cleanups aided by Claude Opus 4.7.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants