Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R"#200421
Merged
Conversation
|
@llvm/pr-subscribers-llvm-ir Author: Martin Storsjö (mstorsjo) ChangesReverts llvm/llvm-project#199615. That change causes nondeterministic failed assertions. 3 Files Affected:
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index b52906a215a7c..081d7e198e94c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> {
} // end namespace detail
-// Befriended below so DenseMapBase can expose its bucket-relocation callback
-// erase to ValueHandleBase, the only caller that caches bucket pointers.
-class ValueHandleBase;
-
template <typename KeyT, typename ValueT,
typename KeyInfoT = DenseMapInfo<KeyT>,
typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
@@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase {
void clear() {
incrementEpoch();
- if (getNumEntries() == 0)
+ if (getNumEntries() == 0 && getNumTombstones() == 0)
return;
// If the capacity of the array is huge, and the # elements used is small,
@@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase {
for (BucketT &B : buckets())
B.getFirst() = EmptyKey;
} else {
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
unsigned NumEntries = getNumEntries();
for (BucketT &B : buckets()) {
if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
- B.getSecond().~ValueT();
- --NumEntries;
+ if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
+ B.getSecond().~ValueT();
+ --NumEntries;
+ }
B.getFirst() = EmptyKey;
}
}
@@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase {
(void)NumEntries;
}
setNumEntries(0);
+ setNumTombstones(0);
}
void shrink_and_clear() {
@@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase {
return Ret;
}
- void eraseFromFilledBucket(BucketT *TheBucket) {
- eraseFromFilledBucket(TheBucket, [](BucketT &) {});
- }
-
bool erase(const KeyT &Val) {
BucketT *TheBucket = doFind(Val);
if (!TheBucket)
return false; // not in map.
- eraseFromFilledBucket(TheBucket);
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
return true;
}
- void erase(iterator I) { eraseFromFilledBucket(&*I); }
+ void erase(iterator I) {
+ BucketT *TheBucket = &*I;
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
+ }
/// Remove entries that match the given predicate. \p Pred is invoked
/// with a reference to each live bucket and must not access the map being
@@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase {
/// into the map are invalidated.
template <typename Predicate> bool remove_if(Predicate Pred) {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned NumBuckets = getNumBuckets();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
bool Removed = false;
for (BucketT &B : buckets()) {
- if (KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) ||
+ KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
continue;
if (Pred(B)) {
B.getSecond().~ValueT();
- B.getFirst() = EmptyKey;
+ B.getFirst() = TombstoneKey;
decrementNumEntries();
+ incrementNumTombstones();
Removed = true;
}
}
- if (Removed) {
+ if (Removed)
incrementEpoch();
- this->grow(NumBuckets);
- }
return Removed;
}
@@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase {
struct ExactBucketCount {};
void initWithExactBucketCount(unsigned NewNumBuckets) {
- if (derived().allocateBuckets(NewNumBuckets))
+ if (derived().allocateBuckets(NewNumBuckets)) {
initEmpty();
- else
+ } else {
setNumEntries(0);
+ setNumTombstones(0);
+ }
}
void destroyAll() {
@@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase {
return;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
B.getSecond().~ValueT();
B.getFirst().~KeyT();
}
@@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase {
static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
"Must pass the derived type to this template!");
setNumEntries(0);
+ setNumTombstones(0);
assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 &&
"# initial buckets must be a power of two!");
@@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase {
void moveFrom(DerivedT &Other) {
// Insert all the old elements.
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : Other.buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
// Insert the key/value into the new table.
BucketT *DestBucket;
bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket);
@@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase {
this->destroyAll();
derived().deallocateBuckets();
setNumEntries(0);
+ setNumTombstones(0);
if (!derived().allocateBuckets(other.getNumBuckets())) {
// The bucket list is empty. No work to do.
return;
@@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase {
assert(getNumBuckets() == other.getNumBuckets());
setNumEntries(other.getNumEntries());
+ setNumTombstones(other.getNumTombstones());
BucketT *Buckets = getBuckets();
const BucketT *OtherBuckets = other.getBuckets();
@@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase {
NumBuckets * sizeof(BucketT));
} else {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (size_t I = 0; I < NumBuckets; ++I) {
::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
- if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
}
}
}
private:
- // ValueHandleBase caches pointers into the bucket array, so it needs the
- // callback erase below to fix them up as entries shift. It is the only
- // intended caller; do not add new ones.
- friend class ValueHandleBase;
-
- /// Erase the entry at \p TheBucket and close the resulting hole via Knuth
- /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket
- /// array, call \p OnMoved per shifted bucket.
- template <typename OnMovedT>
- void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) {
- incrementEpoch();
- TheBucket->getSecond().~ValueT();
- decrementNumEntries();
-
- BucketT *BucketsPtr = getBuckets();
- const unsigned NumBuckets = getNumBuckets();
- const unsigned Mask = NumBuckets - 1;
- const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr);
- unsigned J = I;
- while (true) {
- J = (J + 1) & Mask;
- BucketT &BJ = BucketsPtr[J];
- if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey))
- break;
- auto Ideal = KeyInfoT::getHashValue(BJ.getFirst());
- // If the hole (I) lies on the linear-probe chain from the home bucket
- // (Ideal) to J, shift J into the hole and make J the new hole.
- if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) {
- BucketT &BI = BucketsPtr[I];
- BI.getFirst() = std::move(BJ.getFirst());
- ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond()));
- BJ.getSecond().~ValueT();
- OnMoved(BI);
- I = J;
- }
- }
- BucketsPtr[I].getFirst() = EmptyKey;
- }
-
- /// Erase \p Val and close the resulting hole by potentially shifting other
- /// entries into it. For callers that cache pointers into the bucket array,
- /// call \p OnMoved per shifted bucket.
- template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) {
- BucketT *TheBucket = doFind(Val);
- if (!TheBucket)
- return false;
- eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved));
- return true;
- }
-
DerivedT &derived() { return *static_cast<DerivedT *>(this); }
const DerivedT &derived() const {
return *static_cast<const DerivedT *>(this);
@@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase {
void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
+ unsigned getNumTombstones() const { return derived().getNumTombstones(); }
+
+ void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); }
+
+ void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+ void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
const BucketT *getBuckets() const { return derived().getBuckets(); }
BucketT *getBuckets() { return derived().getBuckets(); }
@@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase {
BucketT *TheBucket) {
incrementEpoch();
- // Grow the table if the load factor would exceed 3/4 after insertion.
- // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps
- // every chain compact and bounded by the table's empty-bucket count,
- // so no tombstone-driven resize is needed.
+ // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow the table.
+ //
+ // The later case is tricky. For example, if we had one empty bucket with
+ // tons of tombstones, failing lookups (e.g. for insertion) would have to
+ // probe almost the entire table until it found the empty bucket. If the
+ // table completely filled with tombstones, no lookup would ever succeed,
+ // causing infinite loops in lookup.
unsigned NewNumEntries = getNumEntries() + 1;
unsigned NumBuckets = getNumBuckets();
if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
this->grow(NumBuckets * 2);
LookupBucketFor(Lookup, TheBucket);
+ } else if (LLVM_UNLIKELY(NumBuckets -
+ (NewNumEntries + getNumTombstones()) <=
+ NumBuckets / 8)) {
+ this->grow(NumBuckets);
+ LookupBucketFor(Lookup, TheBucket);
}
assert(TheBucket);
// Only update the state after we've grown our bucket space appropriately
// so that when growing buckets we have self-consistent entry count.
incrementNumEntries();
+
+ // If we are writing over a tombstone, remember this.
+ const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+ decrementNumTombstones();
+
return TheBucket;
}
@@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
const BucketT *Bucket = BucketsPtr + BucketNo;
if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
@@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase {
if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
return nullptr;
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= NumBuckets - 1;
}
}
@@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase {
/// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the
/// bucket contains the key and a value, this returns true, otherwise it
- /// returns a bucket with an empty marker and returns false.
+ /// returns a bucket with an empty marker or tombstone and returns false.
template <typename LookupKeyT>
bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
BucketT *BucketsPtr = getBuckets();
@@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase {
return false;
}
+ // FoundTombstone - Keep track of whether we find a tombstone while probing.
+ BucketT *FoundTombstone = nullptr;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
- "Empty value shouldn't be inserted into map!");
+ !KeyInfoT::isEqual(Val, TombstoneKey) &&
+ "Empty/Tombstone value shouldn't be inserted into map!");
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
BucketT *ThisBucket = BucketsPtr + BucketNo;
// Found Val's bucket? If so, return it.
@@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase {
}
// If we found an empty bucket, the key doesn't exist in the set.
- // Return it as the insertion point.
+ // Insert it and return the default value.
if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
- FoundBucket = ThisBucket;
+ // If we've already seen a tombstone while probing, fill it in instead
+ // of the empty bucket we eventually probed to.
+ FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
return false;
}
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // If this is a tombstone, remember it. If Val ends up not in the map, we
+ // prefer to return it than something that would require more probing.
+ if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+ !FoundTombstone)
+ FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= (NumBuckets - 1);
}
}
@@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
BucketT *Buckets = nullptr;
unsigned NumEntries = 0;
+ unsigned NumTombstones = 0;
unsigned NumBuckets = 0;
explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
@@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void swapImpl(DenseMap &RHS) {
std::swap(Buckets, RHS.Buckets);
std::swap(NumEntries, RHS.NumEntries);
+ std::swap(NumTombstones, RHS.NumTombstones);
std::swap(NumBuckets, RHS.NumBuckets);
}
@@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void setNumEntries(unsigned Num) { NumEntries = Num; }
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
BucketT *getBuckets() const { return Buckets; }
unsigned getNumBuckets() const { return NumBuckets; }
@@ -896,6 +911,7 @@ class SmallDenseMap
unsigned Small : 1;
unsigned NumEntries : 31;
+ unsigned NumTombstones;
struct LargeRep {
BucketT *Buckets;
@@ -962,8 +978,10 @@ class SmallDenseMap
unsigned TmpNumEntries = RHS.NumEntries;
RHS.NumEntries = NumEntries;
NumEntries = TmpNumEntries;
+ std::swap(NumTombstones, RHS.NumTombstones);
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
if (Small && RHS.Small) {
// If we're swapping inline bucket arrays, we have to cope with some of
// the tricky bits of DenseMap's storage system: the buckets are not
@@ -972,8 +990,10 @@ class SmallDenseMap
for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
BucketT *LHSB = &getInlineBuckets()[i],
*RHSB = &RHS.getInlineBuckets()[i];
- bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey);
- bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey);
+ bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey));
+ bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey));
if (hasLHSValue && hasRHSValue) {
// Swap together if we can...
std::swap(*LHSB, *RHSB);
@@ -1012,7 +1032,8 @@ class SmallDenseMap
*OldB = &SmallSide.getInlineBuckets()[i];
::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
OldB->getFirst().~KeyT();
- if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
OldB->getSecond().~ValueT();
}
@@ -1032,6 +1053,10 @@ class SmallDenseMap
NumEntries = Num;
}
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
const BucketT *getInlineBuckets() const {
assert(Small);
// Note that this cast does not violate aliasing rules as we assert that
@@ -1119,6 +1144,7 @@ class SmallDenseMap
Small = false;
NumEntries = Other.NumEntries;
+ NumTombstones = Other.NumTombstones;
*getLargeRep() = std::move(*Other.getLargeRep());
Other.getLargeRep()->NumBuckets = 0;
return true;
@@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
void AdvancePastEmptyBuckets() {
assert(Ptr <= End);
const KeyT Empty = KeyInfoT::getEmptyKey();
+ const KeyT Tombstone = KeyInfoT::getTombstoneKey();
- while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty))
+ while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) ||
+ KeyInfoT::isEqual(Ptr->getFirst(), Tombstone)))
++Ptr;
}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index f5a501897f0bb..074d42d728c95 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() {
LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
- // TODO: Remove the only user of DenseMap's callback erase.
- Handles.erase(getValPtr(), [](auto &Bucket) {
- Bucket.second->setPrevPtr(&Bucket.second);
- });
+ Handles.erase(getValPtr());
getValPtr()->HasValueHandle = false;
}
}
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index adf788b8f9661..d3200b7722ee3 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
TypeParam D;
- EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!");
+ EXPECT_DEATH(Set.insert(D),
+ "Empty/Tombstone value shouldn't be inserted into map!");
#endif
EXPECT_EQ(3U, Set.size());
|
|
@llvm/pr-subscribers-llvm-adt Author: Martin Storsjö (mstorsjo) ChangesReverts llvm/llvm-project#199615. That change causes nondeterministic failed assertions. 3 Files Affected:
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index b52906a215a7c..081d7e198e94c 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> {
} // end namespace detail
-// Befriended below so DenseMapBase can expose its bucket-relocation callback
-// erase to ValueHandleBase, the only caller that caches bucket pointers.
-class ValueHandleBase;
-
template <typename KeyT, typename ValueT,
typename KeyInfoT = DenseMapInfo<KeyT>,
typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>,
@@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase {
void clear() {
incrementEpoch();
- if (getNumEntries() == 0)
+ if (getNumEntries() == 0 && getNumTombstones() == 0)
return;
// If the capacity of the array is huge, and the # elements used is small,
@@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase {
for (BucketT &B : buckets())
B.getFirst() = EmptyKey;
} else {
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
unsigned NumEntries = getNumEntries();
for (BucketT &B : buckets()) {
if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
- B.getSecond().~ValueT();
- --NumEntries;
+ if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
+ B.getSecond().~ValueT();
+ --NumEntries;
+ }
B.getFirst() = EmptyKey;
}
}
@@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase {
(void)NumEntries;
}
setNumEntries(0);
+ setNumTombstones(0);
}
void shrink_and_clear() {
@@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase {
return Ret;
}
- void eraseFromFilledBucket(BucketT *TheBucket) {
- eraseFromFilledBucket(TheBucket, [](BucketT &) {});
- }
-
bool erase(const KeyT &Val) {
BucketT *TheBucket = doFind(Val);
if (!TheBucket)
return false; // not in map.
- eraseFromFilledBucket(TheBucket);
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
return true;
}
- void erase(iterator I) { eraseFromFilledBucket(&*I); }
+ void erase(iterator I) {
+ BucketT *TheBucket = &*I;
+ incrementEpoch();
+ TheBucket->getSecond().~ValueT();
+ TheBucket->getFirst() = KeyInfoT::getTombstoneKey();
+ decrementNumEntries();
+ incrementNumTombstones();
+ }
/// Remove entries that match the given predicate. \p Pred is invoked
/// with a reference to each live bucket and must not access the map being
@@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase {
/// into the map are invalidated.
template <typename Predicate> bool remove_if(Predicate Pred) {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned NumBuckets = getNumBuckets();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
bool Removed = false;
for (BucketT &B : buckets()) {
- if (KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) ||
+ KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
continue;
if (Pred(B)) {
B.getSecond().~ValueT();
- B.getFirst() = EmptyKey;
+ B.getFirst() = TombstoneKey;
decrementNumEntries();
+ incrementNumTombstones();
Removed = true;
}
}
- if (Removed) {
+ if (Removed)
incrementEpoch();
- this->grow(NumBuckets);
- }
return Removed;
}
@@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase {
struct ExactBucketCount {};
void initWithExactBucketCount(unsigned NewNumBuckets) {
- if (derived().allocateBuckets(NewNumBuckets))
+ if (derived().allocateBuckets(NewNumBuckets)) {
initEmpty();
- else
+ } else {
setNumEntries(0);
+ setNumTombstones(0);
+ }
}
void destroyAll() {
@@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase {
return;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey))
B.getSecond().~ValueT();
B.getFirst().~KeyT();
}
@@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase {
static_assert(std::is_base_of_v<DenseMapBase, DerivedT>,
"Must pass the derived type to this template!");
setNumEntries(0);
+ setNumTombstones(0);
assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 &&
"# initial buckets must be a power of two!");
@@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase {
void moveFrom(DerivedT &Other) {
// Insert all the old elements.
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (BucketT &B : Other.buckets()) {
- if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) {
// Insert the key/value into the new table.
BucketT *DestBucket;
bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket);
@@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase {
this->destroyAll();
derived().deallocateBuckets();
setNumEntries(0);
+ setNumTombstones(0);
if (!derived().allocateBuckets(other.getNumBuckets())) {
// The bucket list is empty. No work to do.
return;
@@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase {
assert(getNumBuckets() == other.getNumBuckets());
setNumEntries(other.getNumEntries());
+ setNumTombstones(other.getNumTombstones());
BucketT *Buckets = getBuckets();
const BucketT *OtherBuckets = other.getBuckets();
@@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase {
NumBuckets * sizeof(BucketT));
} else {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
for (size_t I = 0; I < NumBuckets; ++I) {
::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst());
- if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey))
+ if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey))
::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond());
}
}
}
private:
- // ValueHandleBase caches pointers into the bucket array, so it needs the
- // callback erase below to fix them up as entries shift. It is the only
- // intended caller; do not add new ones.
- friend class ValueHandleBase;
-
- /// Erase the entry at \p TheBucket and close the resulting hole via Knuth
- /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket
- /// array, call \p OnMoved per shifted bucket.
- template <typename OnMovedT>
- void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) {
- incrementEpoch();
- TheBucket->getSecond().~ValueT();
- decrementNumEntries();
-
- BucketT *BucketsPtr = getBuckets();
- const unsigned NumBuckets = getNumBuckets();
- const unsigned Mask = NumBuckets - 1;
- const KeyT EmptyKey = KeyInfoT::getEmptyKey();
- unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr);
- unsigned J = I;
- while (true) {
- J = (J + 1) & Mask;
- BucketT &BJ = BucketsPtr[J];
- if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey))
- break;
- auto Ideal = KeyInfoT::getHashValue(BJ.getFirst());
- // If the hole (I) lies on the linear-probe chain from the home bucket
- // (Ideal) to J, shift J into the hole and make J the new hole.
- if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) {
- BucketT &BI = BucketsPtr[I];
- BI.getFirst() = std::move(BJ.getFirst());
- ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond()));
- BJ.getSecond().~ValueT();
- OnMoved(BI);
- I = J;
- }
- }
- BucketsPtr[I].getFirst() = EmptyKey;
- }
-
- /// Erase \p Val and close the resulting hole by potentially shifting other
- /// entries into it. For callers that cache pointers into the bucket array,
- /// call \p OnMoved per shifted bucket.
- template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) {
- BucketT *TheBucket = doFind(Val);
- if (!TheBucket)
- return false;
- eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved));
- return true;
- }
-
DerivedT &derived() { return *static_cast<DerivedT *>(this); }
const DerivedT &derived() const {
return *static_cast<const DerivedT *>(this);
@@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase {
void decrementNumEntries() { setNumEntries(getNumEntries() - 1); }
+ unsigned getNumTombstones() const { return derived().getNumTombstones(); }
+
+ void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); }
+
+ void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); }
+
+ void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); }
+
const BucketT *getBuckets() const { return derived().getBuckets(); }
BucketT *getBuckets() { return derived().getBuckets(); }
@@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase {
BucketT *TheBucket) {
incrementEpoch();
- // Grow the table if the load factor would exceed 3/4 after insertion.
- // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps
- // every chain compact and bounded by the table's empty-bucket count,
- // so no tombstone-driven resize is needed.
+ // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+ // the buckets are empty (meaning that many are filled with tombstones),
+ // grow the table.
+ //
+ // The later case is tricky. For example, if we had one empty bucket with
+ // tons of tombstones, failing lookups (e.g. for insertion) would have to
+ // probe almost the entire table until it found the empty bucket. If the
+ // table completely filled with tombstones, no lookup would ever succeed,
+ // causing infinite loops in lookup.
unsigned NewNumEntries = getNumEntries() + 1;
unsigned NumBuckets = getNumBuckets();
if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) {
this->grow(NumBuckets * 2);
LookupBucketFor(Lookup, TheBucket);
+ } else if (LLVM_UNLIKELY(NumBuckets -
+ (NewNumEntries + getNumTombstones()) <=
+ NumBuckets / 8)) {
+ this->grow(NumBuckets);
+ LookupBucketFor(Lookup, TheBucket);
}
assert(TheBucket);
// Only update the state after we've grown our bucket space appropriately
// so that when growing buckets we have self-consistent entry count.
incrementNumEntries();
+
+ // If we are writing over a tombstone, remember this.
+ const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey))
+ decrementNumTombstones();
+
return TheBucket;
}
@@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase {
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
const BucketT *Bucket = BucketsPtr + BucketNo;
if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst())))
@@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase {
if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey)))
return nullptr;
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= NumBuckets - 1;
}
}
@@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase {
/// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the
/// bucket contains the key and a value, this returns true, otherwise it
- /// returns a bucket with an empty marker and returns false.
+ /// returns a bucket with an empty marker or tombstone and returns false.
template <typename LookupKeyT>
bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) {
BucketT *BucketsPtr = getBuckets();
@@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase {
return false;
}
+ // FoundTombstone - Keep track of whether we find a tombstone while probing.
+ BucketT *FoundTombstone = nullptr;
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
- "Empty value shouldn't be inserted into map!");
+ !KeyInfoT::isEqual(Val, TombstoneKey) &&
+ "Empty/Tombstone value shouldn't be inserted into map!");
unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1);
+ unsigned ProbeAmt = 1;
while (true) {
BucketT *ThisBucket = BucketsPtr + BucketNo;
// Found Val's bucket? If so, return it.
@@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase {
}
// If we found an empty bucket, the key doesn't exist in the set.
- // Return it as the insertion point.
+ // Insert it and return the default value.
if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) {
- FoundBucket = ThisBucket;
+ // If we've already seen a tombstone while probing, fill it in instead
+ // of the empty bucket we eventually probed to.
+ FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
return false;
}
- // Hash collision: continue linear probing.
- BucketNo = (BucketNo + 1) & (NumBuckets - 1);
+ // If this is a tombstone, remember it. If Val ends up not in the map, we
+ // prefer to return it than something that would require more probing.
+ if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) &&
+ !FoundTombstone)
+ FoundTombstone = ThisBucket; // Remember the first tombstone found.
+
+ // Otherwise, it's a hash collision or a tombstone, continue quadratic
+ // probing.
+ BucketNo += ProbeAmt++;
+ BucketNo &= (NumBuckets - 1);
}
}
@@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
BucketT *Buckets = nullptr;
unsigned NumEntries = 0;
+ unsigned NumTombstones = 0;
unsigned NumBuckets = 0;
explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) {
@@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void swapImpl(DenseMap &RHS) {
std::swap(Buckets, RHS.Buckets);
std::swap(NumEntries, RHS.NumEntries);
+ std::swap(NumTombstones, RHS.NumTombstones);
std::swap(NumBuckets, RHS.NumBuckets);
}
@@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>,
void setNumEntries(unsigned Num) { NumEntries = Num; }
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
BucketT *getBuckets() const { return Buckets; }
unsigned getNumBuckets() const { return NumBuckets; }
@@ -896,6 +911,7 @@ class SmallDenseMap
unsigned Small : 1;
unsigned NumEntries : 31;
+ unsigned NumTombstones;
struct LargeRep {
BucketT *Buckets;
@@ -962,8 +978,10 @@ class SmallDenseMap
unsigned TmpNumEntries = RHS.NumEntries;
RHS.NumEntries = NumEntries;
NumEntries = TmpNumEntries;
+ std::swap(NumTombstones, RHS.NumTombstones);
const KeyT EmptyKey = KeyInfoT::getEmptyKey();
+ const KeyT TombstoneKey = KeyInfoT::getTombstoneKey();
if (Small && RHS.Small) {
// If we're swapping inline bucket arrays, we have to cope with some of
// the tricky bits of DenseMap's storage system: the buckets are not
@@ -972,8 +990,10 @@ class SmallDenseMap
for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
BucketT *LHSB = &getInlineBuckets()[i],
*RHSB = &RHS.getInlineBuckets()[i];
- bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey);
- bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey);
+ bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey));
+ bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey));
if (hasLHSValue && hasRHSValue) {
// Swap together if we can...
std::swap(*LHSB, *RHSB);
@@ -1012,7 +1032,8 @@ class SmallDenseMap
*OldB = &SmallSide.getInlineBuckets()[i];
::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
OldB->getFirst().~KeyT();
- if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) {
+ if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
+ !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
OldB->getSecond().~ValueT();
}
@@ -1032,6 +1053,10 @@ class SmallDenseMap
NumEntries = Num;
}
+ unsigned getNumTombstones() const { return NumTombstones; }
+
+ void setNumTombstones(unsigned Num) { NumTombstones = Num; }
+
const BucketT *getInlineBuckets() const {
assert(Small);
// Note that this cast does not violate aliasing rules as we assert that
@@ -1119,6 +1144,7 @@ class SmallDenseMap
Small = false;
NumEntries = Other.NumEntries;
+ NumTombstones = Other.NumTombstones;
*getLargeRep() = std::move(*Other.getLargeRep());
Other.getLargeRep()->NumBuckets = 0;
return true;
@@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase {
void AdvancePastEmptyBuckets() {
assert(Ptr <= End);
const KeyT Empty = KeyInfoT::getEmptyKey();
+ const KeyT Tombstone = KeyInfoT::getTombstoneKey();
- while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty))
+ while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) ||
+ KeyInfoT::isEqual(Ptr->getFirst(), Tombstone)))
++Ptr;
}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index f5a501897f0bb..074d42d728c95 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() {
LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl;
DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
- // TODO: Remove the only user of DenseMap's callback erase.
- Handles.erase(getValPtr(), [](auto &Bucket) {
- Bucket.second->setPrevPtr(&Bucket.second);
- });
+ Handles.erase(getValPtr());
getValPtr()->HasValueHandle = false;
}
}
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index adf788b8f9661..d3200b7722ee3 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
TypeParam D;
- EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!");
+ EXPECT_DEATH(Set.insert(D),
+ "Empty/Tombstone value shouldn't be inserted into map!");
#endif
EXPECT_EQ(3U, Set.size());
|
aengelke
approved these changes
May 29, 2026
Contributor
|
Note: this PR was made after the original DenseMap PR, so it's possible you may need to revert it as well: #200179 |
Contributor
|
No need to revert #200179. This is independent. |
MaskRay
added a commit
that referenced
this pull request
May 30, 2026
…0595) DenseMap uses quadratic probing with lazy deletion: an erased entry becomes a tombstone, a third bucket state alongside empty and live that every find/insert must inspect. Switch to linear probing with backward-shift deletion (Knuth TAOCP 6.4 Algorithm R), similar to the SmallPtrSet change #197637. This removes the tombstone state entirely. In exchange, erase now relocates the following live entries to close the hole, so it invalidates iterators and references other than the erased one. For callers that cache pointers into the bucket array, erase(Key, OnMoved) and erase(iterator, OnMoved) fire a callback once per shifted bucket, so fix-ups cost O(cluster) rather than O(NumEntries). ValueHandleBase::RemoveFromUseList uses this to refresh each moved handle's PrevPtr. Linear probing is more vulnerable to primary clustering than quadratic probing, so this relies on the stronger DenseMapInfo<T*>::getHashValue mixer from #197390. Operation distribution when compiling CGExpr.cpp/ScalarEvolution.cpp: 62.8% lookups, 34.3% inserts, 2.9% erases. The heaviest DenseMap specializations have pointer keys and 16-byte key/value pairs. Alternatives such as Robin Hood hashing, Verstable, and Boost's unordered_flat_map were evaluated; they are slower and have a larger code footprint. I believe the current in-band sentinel value approach, despite the pain (#146595), is the best, or at least very difficult to beat. --- This is a pure reland of #199615 (reverted in #200421) after fixing the PoisoningVH bug by #200540. Non-core cleanups aided by Claude Opus 4.7.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Reverts #199615.
That change causes nondeterministic failed assertions.