Skip to content

Commit c2b8a93

Browse files
Backport #87231 to 25.7: Fix "Too large size passed to allocator" UB in JOIN due to mixed const and non-const blocks
1 parent c7799a8 commit c2b8a93

File tree

7 files changed

+85
-75
lines changed

7 files changed

+85
-75
lines changed

src/Interpreters/InterpreterInsertQuery.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -441,15 +441,6 @@ QueryPipeline InterpreterInsertQuery::addInsertToSelectPipeline(ASTInsertQuery &
441441
bool should_squash = shouldAddSquashingForStorage(table, getContext()) && !no_squash && !async_insert;
442442
if (should_squash)
443443
{
444-
/// Squashing cannot work with const and non-const blocks
445-
pipeline.addSimpleTransform([&](const SharedHeader & in_header) -> ProcessorPtr
446-
{
447-
/// Sparse columns will be converted to full in the InsertDependenciesBuilder,
448-
/// and for squashing we don't need to convert column to full since it will do it by itself
449-
bool remove_sparse = false;
450-
return std::make_shared<MaterializingTransform>(in_header, remove_sparse);
451-
});
452-
453444
pipeline.addSimpleTransform(
454445
[&](const SharedHeader & in_header) -> ProcessorPtr
455446
{

src/Interpreters/Squashing.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ Chunk Squashing::squash(std::vector<Chunk> && input_chunks, Chunk::ChunkInfoColl
165165
{
166166
if (!have_same_serialization[i])
167167
{
168-
mutable_columns[i] = recursiveRemoveSparse(std::move(mutable_columns[i]))->assumeMutable();
168+
mutable_columns[i] = recursiveRemoveSparse(std::move(mutable_columns[i]))->convertToFullColumnIfConst()->assumeMutable();
169169
for (auto & column : source_columns_list[i])
170-
column = recursiveRemoveSparse(column);
170+
column = recursiveRemoveSparse(column)->convertToFullColumnIfConst();
171171
}
172172

173173
/// We know all the data we will insert in advance and can make all necessary pre-allocations.

src/Storages/LiveView/StorageLiveView.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -658,11 +658,6 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes)
658658
builder = interpreter.buildQueryPipeline();
659659
}
660660

661-
builder.addSimpleTransform([&](const SharedHeader & cur_header)
662-
{
663-
return std::make_shared<MaterializingTransform>(cur_header);
664-
});
665-
666661
/// Squashing is needed here because the view query can generate a lot of blocks
667662
/// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY
668663
/// and two-level aggregation is triggered).

src/Storages/WindowView/StorageWindowView.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -646,10 +646,6 @@ std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
646646

647647
builder = select.buildQueryPipeline();
648648

649-
builder.addSimpleTransform([&](const SharedHeader & current_header)
650-
{
651-
return std::make_shared<MaterializingTransform>(current_header);
652-
});
653649
builder.addSimpleTransform(
654650
[&](const SharedHeader & current_header)
655651
{

tests/queries/0_stateless/03531_insert_removing_sparse_transform.reference

Lines changed: 47 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,14 @@ digraph
1111
n1[label="ExpressionTransform_1"];
1212
n2[label="ExpressionTransform_2"];
1313
n3[label="CountingTransform_3"];
14-
n4[label="MaterializingTransform_4"];
15-
n5[label="PlanSquashingTransform_5"];
16-
n6[label="DeduplicationToken::AddTokenInfoTransform_6"];
17-
n7[label="ApplySquashingTransform_11"];
18-
n8[label="ConvertingTransform_7"];
19-
n9[label="NestedElementsValidationTransform_8"];
20-
n10[label="RemovingSparseTransform_9"];
21-
n11[label="LogSink_10"];
22-
n12[label="EmptySink_12"];
14+
n4[label="PlanSquashingTransform_4"];
15+
n5[label="DeduplicationToken::AddTokenInfoTransform_5"];
16+
n6[label="ApplySquashingTransform_10"];
17+
n7[label="ConvertingTransform_6"];
18+
n8[label="NestedElementsValidationTransform_7"];
19+
n9[label="RemovingSparseTransform_8"];
20+
n10[label="LogSink_9"];
21+
n11[label="EmptySink_11"];
2322
}
2423
n0 -> n1;
2524
n1 -> n2;
@@ -32,7 +31,6 @@ digraph
3231
n8 -> n9;
3332
n9 -> n10;
3433
n10 -> n11;
35-
n11 -> n12;
3634
}
3735
-- MergeTree support sparse columns - no RemovingSparseTransform
3836
create table t_mt (key Int) engine=MergeTree order by ();
@@ -45,14 +43,13 @@ digraph
4543
n1[label="ExpressionTransform_1"];
4644
n2[label="ExpressionTransform_2"];
4745
n3[label="CountingTransform_3"];
48-
n4[label="MaterializingTransform_4"];
49-
n5[label="PlanSquashingTransform_5"];
50-
n6[label="DeduplicationToken::AddTokenInfoTransform_6"];
51-
n7[label="ApplySquashingTransform_10"];
52-
n8[label="ConvertingTransform_7"];
53-
n9[label="NestedElementsValidationTransform_8"];
54-
n10[label="MergeTreeSink_9"];
55-
n11[label="EmptySink_11"];
46+
n4[label="PlanSquashingTransform_4"];
47+
n5[label="DeduplicationToken::AddTokenInfoTransform_5"];
48+
n6[label="ApplySquashingTransform_9"];
49+
n7[label="ConvertingTransform_6"];
50+
n8[label="NestedElementsValidationTransform_7"];
51+
n9[label="MergeTreeSink_8"];
52+
n10[label="EmptySink_10"];
5653
}
5754
n0 -> n1;
5855
n1 -> n2;
@@ -64,7 +61,6 @@ digraph
6461
n7 -> n8;
6562
n8 -> n9;
6663
n9 -> n10;
67-
n10 -> n11;
6864
}
6965
-- MergeTree pushes to Log, which does not support sparse columns - RemovingSparseTransform added
7066
create materialized view mv to t_log as select * from t_mt;
@@ -77,23 +73,22 @@ digraph
7773
n1[label="ExpressionTransform_1"];
7874
n2[label="ExpressionTransform_2"];
7975
n3[label="CountingTransform_3"];
80-
n4[label="MaterializingTransform_4"];
81-
n5[label="PlanSquashingTransform_5"];
82-
n6[label="DeduplicationToken::AddTokenInfoTransform_6"];
83-
n7[label="ApplySquashingTransform_19"];
84-
n8[label="ConvertingTransform_7"];
85-
n9[label="NestedElementsValidationTransform_8"];
86-
n10[label="MergeTreeSink_9"];
87-
n11[label="Copy_17"];
88-
n12[label="BeginingViewsTransform_10"];
89-
n13[label="ExecutingInnerQueryFromView_13"];
90-
n14[label="CountingTransform_12"];
91-
n15[label="SquashingTransform_11"];
92-
n16[label="NestedElementsValidationTransform_14"];
93-
n17[label="RemovingSparseTransform_15"];
94-
n18[label="LogSink_16"];
95-
n19[label="FinalizingViewsTransform_18"];
96-
n20[label="EmptySink_20"];
76+
n4[label="PlanSquashingTransform_4"];
77+
n5[label="DeduplicationToken::AddTokenInfoTransform_5"];
78+
n6[label="ApplySquashingTransform_18"];
79+
n7[label="ConvertingTransform_6"];
80+
n8[label="NestedElementsValidationTransform_7"];
81+
n9[label="MergeTreeSink_8"];
82+
n10[label="Copy_16"];
83+
n11[label="BeginingViewsTransform_9"];
84+
n12[label="ExecutingInnerQueryFromView_12"];
85+
n13[label="CountingTransform_11"];
86+
n14[label="SquashingTransform_10"];
87+
n15[label="NestedElementsValidationTransform_13"];
88+
n16[label="RemovingSparseTransform_14"];
89+
n17[label="LogSink_15"];
90+
n18[label="FinalizingViewsTransform_17"];
91+
n19[label="EmptySink_19"];
9792
}
9893
n0 -> n1;
9994
n1 -> n2;
@@ -114,7 +109,6 @@ digraph
114109
n16 -> n17;
115110
n17 -> n18;
116111
n18 -> n19;
117-
n19 -> n20;
118112
}
119113
drop table mv;
120114
-- Log does not support sparse columns - RemovingSparseTransform added
@@ -128,23 +122,22 @@ digraph
128122
n1[label="ExpressionTransform_1"];
129123
n2[label="ExpressionTransform_2"];
130124
n3[label="CountingTransform_3"];
131-
n4[label="MaterializingTransform_4"];
132-
n5[label="PlanSquashingTransform_5"];
133-
n6[label="DeduplicationToken::AddTokenInfoTransform_6"];
134-
n7[label="ApplySquashingTransform_19"];
135-
n8[label="ConvertingTransform_7"];
136-
n9[label="NestedElementsValidationTransform_8"];
137-
n10[label="RemovingSparseTransform_9"];
138-
n11[label="LogSink_10"];
139-
n12[label="Copy_17"];
140-
n13[label="BeginingViewsTransform_11"];
141-
n14[label="ExecutingInnerQueryFromView_14"];
142-
n15[label="CountingTransform_13"];
143-
n16[label="SquashingTransform_12"];
144-
n17[label="NestedElementsValidationTransform_15"];
145-
n18[label="MergeTreeSink_16"];
146-
n19[label="FinalizingViewsTransform_18"];
147-
n20[label="EmptySink_20"];
125+
n4[label="PlanSquashingTransform_4"];
126+
n5[label="DeduplicationToken::AddTokenInfoTransform_5"];
127+
n6[label="ApplySquashingTransform_18"];
128+
n7[label="ConvertingTransform_6"];
129+
n8[label="NestedElementsValidationTransform_7"];
130+
n9[label="RemovingSparseTransform_8"];
131+
n10[label="LogSink_9"];
132+
n11[label="Copy_16"];
133+
n12[label="BeginingViewsTransform_10"];
134+
n13[label="ExecutingInnerQueryFromView_13"];
135+
n14[label="CountingTransform_12"];
136+
n15[label="SquashingTransform_11"];
137+
n16[label="NestedElementsValidationTransform_14"];
138+
n17[label="MergeTreeSink_15"];
139+
n18[label="FinalizingViewsTransform_17"];
140+
n19[label="EmptySink_19"];
148141
}
149142
n0 -> n1;
150143
n1 -> n2;
@@ -165,5 +158,4 @@ digraph
165158
n16 -> n17;
166159
n17 -> n18;
167160
n18 -> n19;
168-
n19 -> n20;
169161
}

tests/queries/0_stateless/03630_join_blocks_with_different_constness.reference

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
-- Regression for the case when the JOIN contains const and non-const blocks, which leads to UB:
2+
--
3+
-- Too large size (18446603496615682040) passed to allocator. It indicates an error
4+
WITH
5+
input_1 AS (SELECT number::String AS parent_id, number::String as id, number::String as value FROM numbers_mt(1e6)),
6+
dimensions_1 AS (SELECT number::String AS value_id FROM numbers_mt(1e6)),
7+
dimensions_2 AS (SELECT number::String AS value_id FROM numbers_mt(1e6)),
8+
parents AS
9+
(
10+
SELECT 'foo' AS type, parent_id
11+
FROM input_1
12+
GROUP BY parent_id
13+
),
14+
parents_with_value AS
15+
(
16+
SELECT type, parent_id, t.value
17+
FROM parents
18+
LEFT JOIN input_1 AS t ON t.id = parents.parent_id
19+
),
20+
values AS
21+
(
22+
SELECT 'foo' AS type, '' AS parent_id, value
23+
FROM input_1
24+
),
25+
all AS
26+
(
27+
SELECT * FROM parents_with_value
28+
UNION ALL
29+
SELECT * FROM values
30+
)
31+
SELECT type, value
32+
FROM all
33+
INNER JOIN dimensions_1 AS dim1 ON all.value = dim1.value_id
34+
INNER JOIN dimensions_2 AS dim2 ON all.value = dim2.value_id
35+
FORMAT `Null`
36+
SETTINGS max_block_size=65535, max_joined_block_size_rows=65535, max_threads=32;

0 commit comments

Comments
 (0)