Skip to content

Commit e373862

Browse files
committed
Do not use more then 98K of memory for uniqCombined*
uniqCombined() uses hashtable for medium cardinality, and since HashTable resize by the power of 2 (well actually HashTableGrower grows double by the power of 2, hence HashTableGrower::increaseSize() should be overwritten to change this), with 1<<13 (default for uniqCombined) and UInt64 HashValueType, the HashTable will takes: getBufferSizeInBytes() == 131072 While it should be not greater then sizeof(HLL) ~= 98K, so reduce the maximum cardinality for hashtable to 1<<12 with UInt64 HashValueType and to 1<13 with UInt32, overwrite HashTableGrower::increaseSize() and cover this using max_memory_usage. Refs: #7221 (comment) v2: cover uniqCombined() with non-default K
1 parent 15deedb commit e373862

File tree

3 files changed

+71
-3
lines changed

3 files changed

+71
-3
lines changed

dbms/src/AggregateFunctions/AggregateFunctionUniqCombined.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ namespace detail
6666

6767
}
6868

69+
// Unlike HashTableGrower always grows to power of 2.
70+
struct UniqCombinedHashTableGrower : public HashTableGrower<>
71+
{
72+
void increaseSize() { ++size_degree; }
73+
};
6974

7075
template <typename Key, UInt8 K>
7176
struct AggregateFunctionUniqCombinedDataWithKey
@@ -76,7 +81,7 @@ struct AggregateFunctionUniqCombinedDataWithKey
7681
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
7782
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
7883
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
79-
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, HashTableGrower<>>, 16, K - 4, K, TrivialHash, Key>;
84+
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
8085

8186
Set set;
8287
};
@@ -85,9 +90,9 @@ template <typename Key>
8590
struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
8691
{
8792
using Set = CombinedCardinalityEstimator<Key,
88-
HashSet<Key, TrivialHash, HashTableGrower<>>,
93+
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
8994
16,
90-
13,
95+
12 + (sizeof(Key) == sizeof(UInt32)),
9196
17,
9297
TrivialHash,
9398
Key,
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
UInt32
2+
819200
3+
UInt64
4+
409600
5+
K=16
6+
UInt32
7+
409600
8+
UInt64
9+
204800
10+
K=18
11+
UInt32
12+
1638400
13+
UInt64
14+
819200
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
-- each uniqCombined state should not use > sizeof(HLL) in memory,
2+
-- sizeof(HLL) is (2^K * 6 / 8)
3+
-- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400
4+
5+
-- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
6+
SELECT 'UInt32';
7+
SET max_memory_usage = 4000000;
8+
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
9+
SET max_memory_usage = 9830400;
10+
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
11+
12+
-- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements
13+
SELECT 'UInt64';
14+
SET max_memory_usage = 4000000;
15+
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
16+
SET max_memory_usage = 9830400;
17+
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
18+
19+
SELECT 'K=16';
20+
21+
-- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements
22+
SELECT 'UInt32';
23+
SET max_memory_usage = 2000000;
24+
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
25+
SET max_memory_usage = 4915200;
26+
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
27+
28+
-- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
29+
SELECT 'UInt64';
30+
SET max_memory_usage = 2000000;
31+
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 }
32+
SET max_memory_usage = 4915200;
33+
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k);
34+
35+
SELECT 'K=18';
36+
37+
-- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements
38+
SELECT 'UInt32';
39+
SET max_memory_usage = 8000000;
40+
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 }
41+
SET max_memory_usage = 19660800;
42+
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k);
43+
44+
-- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements
45+
SELECT 'UInt64';
46+
SET max_memory_usage = 8000000;
47+
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
48+
SET max_memory_usage = 19660800;
49+
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);

0 commit comments

Comments
 (0)