Skip to content

Commit fb9e469

Browse files
Backport #100234 to 26.2: Fix shared variant column pointers in ColumnVariant::filter
1 parent 2d73bbf commit fb9e469

File tree

7 files changed

+59
-4
lines changed

7 files changed

+59
-4
lines changed

src/Columns/ColumnVariant.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,10 @@ ColumnPtr ColumnVariant::filter(const Filter & filt, ssize_t result_size_hint) c
933933
/// If we have only NULLs, just filter local_discriminators column.
934934
if (hasOnlyNulls())
935935
{
936-
Columns new_variants(variants.begin(), variants.end());
936+
Columns new_variants;
937+
new_variants.reserve(variants.size());
938+
for (const auto & variant : variants)
939+
new_variants.emplace_back(variant->cloneEmpty());
937940
auto new_discriminators = local_discriminators->filter(filt, result_size_hint);
938941
/// In case of all NULL values offsets doesn't contain any useful values, just resize it.
939942
ColumnPtr new_offsets = offsets->cloneResized(new_discriminators->size());

src/Interpreters/Set.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ void Set::appendSetElements(SetKeyColumns & holder)
280280
{
281281
auto filtered_column = holder.key_columns[i]->filter(holder.filter->getData(), rows);
282282
if (set_elements[i]->empty())
283-
set_elements[i] = filtered_column;
283+
set_elements[i] = IColumn::mutate(std::move(filtered_column));
284284
else
285285
set_elements[i]->insertRangeFrom(*filtered_column, 0, filtered_column->size());
286286
if (transform_null_in && holder.null_map_holder)
@@ -294,6 +294,16 @@ void Set::checkIsCreated() const
294294
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to use set before it has been built.");
295295
}
296296

297+
Columns Set::getSetElements() const
298+
{
299+
checkIsCreated();
300+
Columns result;
301+
result.reserve(set_elements.size());
302+
for (const auto & col : set_elements)
303+
result.push_back(col->getPtr());
304+
return result;
305+
}
306+
297307
ColumnUInt8::Ptr checkDateTimePrecision(const ColumnWithTypeAndName & column_to_cast)
298308
{
299309
// Handle nullable columns

src/Interpreters/Set.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class Set
7878

7979
bool hasExplicitSetElements() const { return fill_set_elements || (!set_elements.empty() && set_elements.front()->size() == data.getTotalRowCount()); }
8080
bool hasSetElements() const { return !set_elements.empty(); }
81-
Columns getSetElements() const { checkIsCreated(); return { set_elements.begin(), set_elements.end() }; }
81+
Columns getSetElements() const;
8282

8383
void checkColumnsNumber(size_t num_key_columns) const;
8484
bool areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const;
@@ -140,7 +140,7 @@ class Set
140140

141141
/// Collected elements of `Set`.
142142
/// It is necessary for the index to work on the primary key in the IN statement.
143-
std::vector<IColumn::WrappedPtr> set_elements;
143+
MutableColumns set_elements;
144144

145145
/** Protects work with the set in the functions `insertFromBlock` and `execute`.
146146
* These functions can be called simultaneously from different threads only when using StorageSet,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
200000 2147483647
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
SET enable_analyzer = 1;
2+
3+
CREATE TABLE test__fuzz_1 (`id` Nullable(UInt64), `d` Dynamic(max_types = 133)) ENGINE = Memory;
4+
5+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, number FROM numbers(100000) SETTINGS min_insert_block_size_rows = 50000;
6+
7+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, concat('str_', toString(number)) FROM numbers(100000, 100000) SETTINGS min_insert_block_size_rows = 50000;
8+
9+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, arrayMap(x -> multiIf((number % 9) = 0, NULL, (number % 9) = 3, concat('str_', toString(number)), number), range((number % 10) + 1)) FROM numbers(200000, 100000) SETTINGS min_insert_block_size_rows = 50000;
10+
11+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, NULL FROM numbers(300000, 100000) SETTINGS min_insert_block_size_rows = 50000;
12+
13+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, multiIf((number % 4) = 3, concat('str_', toString(number)), (number % 4) = 2, NULL, (number % 4) = 1, number, arrayMap(x -> multiIf((number % 9) = 0, NULL, (number % 9) = 3, concat('str_', toString(number)), number), range((number % 10) + 1))) FROM numbers(400000, 400000) SETTINGS min_insert_block_size_rows = 50000;
14+
15+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, if((number % 5) = 1, CAST([range(CAST((number % 10) + 1, 'UInt64'))], 'Array(Array(Dynamic))'), number) FROM numbers(100000, 100000) SETTINGS min_insert_block_size_rows = 50000;
16+
17+
INSERT INTO test__fuzz_1 SETTINGS min_insert_block_size_rows = 50000 SELECT number, if((number % 5) = 1, CAST(CAST(concat('str_', number), 'LowCardinality(String)'), 'Dynamic'), CAST(number, 'Dynamic')) FROM numbers(100000, 100000) SETTINGS min_insert_block_size_rows = 50000;
18+
19+
SELECT count(equals(toInt256(257), intDiv(65536, -2147483649) AS alias144)), toInt128(2147483647) FROM test__fuzz_1 WHERE NOT empty((SELECT d.`Array(Variant(String, UInt64))`));
20+
21+
DROP TABLE test__fuzz_1;
22+

tests/queries/0_stateless/04051_variant_filter_shared_columns_bug.reference

Whitespace-only changes.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
-- Regression test for ColumnVariant::filter sharing variant column pointers
2+
-- in the hasOnlyNulls() optimization path. The filter was copying ColumnPtr
3+
-- shared pointers instead of cloning, causing two ColumnVariant objects to
4+
-- share the same variant columns. When one was mutated via insertRangeFrom,
5+
-- the other became inconsistent (discriminators_size=0 but variant_sizes>0),
6+
-- leading to a LOGICAL_ERROR in compress().
7+
8+
SET cross_join_min_rows_to_compress = 1;
9+
SET enable_analyzer = 1;
10+
11+
DROP TABLE IF EXISTS test_variant_filter;
12+
CREATE TABLE test_variant_filter (`id` UInt64, `d` Dynamic) ENGINE = Memory;
13+
14+
INSERT INTO test_variant_filter SELECT number, NULL FROM numbers(50000);
15+
INSERT INTO test_variant_filter SELECT number, [number, 'str']::Array(Variant(String, UInt64)) FROM numbers(50000);
16+
17+
SELECT 1 FROM test_variant_filter WHERE NOT empty((SELECT d.`Array(Variant(String, UInt64))`)) FORMAT Null;
18+
19+
DROP TABLE test_variant_filter;

0 commit comments

Comments
 (0)