Skip to content

Commit 594879a

Browse files
authored
Merge pull request #85240 from ClickHouse/reorder_any_joins
Allow join sides swapping for ANY join
2 parents b499c8d + 8b4bfbe commit 594879a

13 files changed

+491
-428
lines changed

src/Interpreters/MergeJoin.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,17 +1199,19 @@ void MergeJoin::addConditionJoinColumn(Block & block, JoinTableSide block_side)
11991199

12001200
bool MergeJoin::isSupported(const std::shared_ptr<TableJoin> & table_join)
12011201
{
1202-
auto kind = table_join->kind();
1203-
auto strictness = table_join->strictness();
1202+
return isSupported(table_join->kind(), table_join->strictness()) && table_join->oneDisjunct();
1203+
}
12041204

1205+
bool MergeJoin::isSupported(JoinKind kind, JoinStrictness strictness)
1206+
{
12051207
bool is_any = (strictness == JoinStrictness::Any);
12061208
bool is_all = (strictness == JoinStrictness::All);
12071209
bool is_semi = (strictness == JoinStrictness::Semi);
12081210

12091211
bool all_join = is_all && (isInner(kind) || isLeft(kind) || isRight(kind) || isFull(kind));
12101212
bool special_left = isInnerOrLeft(kind) && (is_any || is_semi);
12111213

1212-
return (all_join || special_left) && table_join->oneDisjunct();
1214+
return all_join || special_left;
12131215
}
12141216

12151217
MergeJoin::RightBlockInfo::RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number_, size_t & skip_, RowBitmaps * bitmaps_)

src/Interpreters/MergeJoin.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#pragma once
22

3-
#include <Common/SharedMutex.h>
4-
#include <Common/CacheBase.h>
53
#include <Core/Block.h>
64
#include <Core/SortDescription.h>
75
#include <Interpreters/IJoin.h>
86
#include <Interpreters/SortedBlocksWriter.h>
97
#include <QueryPipeline/SizeLimits.h>
8+
#include <Common/CacheBase.h>
9+
#include <Common/SharedMutex.h>
1010

1111
namespace DB
1212
{
@@ -50,6 +50,7 @@ class MergeJoin : public IJoin
5050
IBlocksStreamPtr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
5151

5252
static bool isSupported(const std::shared_ptr<TableJoin> & table_join);
53+
static bool isSupported(JoinKind kind, JoinStrictness strictness);
5354

5455
private:
5556
friend class NotJoinedMerge;

src/Processors/QueryPlan/Optimizations/optimizeJoin.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
1+
#include <Core/Settings.h>
2+
#include <Interpreters/HashJoin/HashJoin.h>
3+
#include <Interpreters/IJoin.h>
4+
#include <Interpreters/MergeJoin.h>
15
#include <Processors/QueryPlan/ExpressionStep.h>
26
#include <Processors/QueryPlan/FilterStep.h>
37
#include <Processors/QueryPlan/ITransformingStep.h>
48
#include <Processors/QueryPlan/JoinStep.h>
59
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
6-
#include <Processors/QueryPlan/Optimizations/actionsDAGUtils.h>
710
#include <Processors/QueryPlan/Optimizations/Utils.h>
11+
#include <Processors/QueryPlan/Optimizations/actionsDAGUtils.h>
12+
#include <Processors/QueryPlan/ReadFromMemoryStorageStep.h>
813
#include <Processors/QueryPlan/ReadFromMergeTree.h>
914
#include <Processors/QueryPlan/SortingStep.h>
1015
#include <Storages/StorageMemory.h>
11-
#include <Processors/QueryPlan/ReadFromMemoryStorageStep.h>
12-
#include <Core/Settings.h>
13-
#include <Interpreters/IJoin.h>
14-
#include <Interpreters/HashJoin/HashJoin.h>
1516

1617
#include <Processors/QueryPlan/JoinStepLogical.h>
1718
#include <Processors/QueryPlan/ReadFromPreparedSource.h>
@@ -20,6 +21,7 @@
2021
#include <Interpreters/TableJoin.h>
2122
#include <Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h>
2223

24+
#include <algorithm>
2325
#include <limits>
2426
#include <memory>
2527
#include <Core/Joins.h>
@@ -383,7 +385,16 @@ optimizeJoinLogical(QueryPlan::Node & node, QueryPlan::Nodes &, const QueryPlanO
383385
/// fixme: USING clause handled specially in join algorithm, so swap breaks it
384386
/// fixme: Swapping for SEMI and ANTI joins should be alright, need to try to enable it and test
385387
const auto & join_info = join_step->getJoinInfo();
386-
if (join_info.expression.is_using || join_info.strictness != JoinStrictness::All)
388+
/// At the time of writing, we're not able to swap inputs for ANY partial merge join, because it only supports ANY inner or left joins, but not right.
389+
const bool partial_merge_join_can_be_selected = std::ranges::any_of(
390+
join_step->getJoinSettings().join_algorithms,
391+
[](JoinAlgorithm alg)
392+
{ return alg == JoinAlgorithm::PARTIAL_MERGE || alg == JoinAlgorithm::PREFER_PARTIAL_MERGE || alg == JoinAlgorithm::AUTO; });
393+
const bool should_worry_about_partial_merge_join = partial_merge_join_can_be_selected
394+
&& (!MergeJoin::isSupported(join_info.kind, join_info.strictness)
395+
|| !MergeJoin::isSupported(reverseJoinKind(join_info.kind), join_info.strictness));
396+
const bool suitable_any_join = join_info.strictness == JoinStrictness::Any && !should_worry_about_partial_merge_join;
397+
if (join_info.expression.is_using || (join_info.strictness != JoinStrictness::All && !suitable_any_join))
387398
return rhs_estimation;
388399

389400
join_step->setSwapInputs();

tests/queries/0_stateless/01655_plan_optimizations.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,43 +223,44 @@ $CLICKHOUSE_CLIENT -q "
223223
select number from numbers(5) where number in (select 1 + number from numbers(3))
224224
) where number != 2 settings enable_optimize_predicate_expression=0"
225225

226+
# `query_plan_join_swap_table = 0` below to fix the query plan
226227
echo "> one condition of filter is pushed down before LEFT JOIN"
227228
$CLICKHOUSE_CLIENT --enable_analyzer=0 -q "
228229
explain actions = 1
229230
select number as a, r.b from numbers(4) as l any left join (
230231
select number + 2 as b from numbers(3)
231-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
232+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0" |
232233
grep -o "Join\|Filter column: notEquals(number, 1)"
233234
echo "> (analyzer) one condition of filter is pushed down before LEFT JOIN"
234235
$CLICKHOUSE_CLIENT --enable_analyzer=1 -q "
235236
explain actions = 1
236237
select number as a, r.b from numbers(4) as l any left join (
237238
select number + 2 as b from numbers(3)
238-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
239+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0" |
239240
grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)"
240241
$CLICKHOUSE_CLIENT -q "
241242
select number as a, r.b from numbers(4) as l any left join (
242243
select number + 2 as b from numbers(3)
243-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | sort
244+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0" | sort
244245

245246
echo "> one condition of filter is pushed down before INNER JOIN"
246247
$CLICKHOUSE_CLIENT --enable_analyzer=0 -q "
247248
explain actions = 1
248249
select number as a, r.b from numbers(4) as l any inner join (
249250
select number + 2 as b from numbers(3)
250-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
251+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0" |
251252
grep -o "Join\|Filter column: and(notEquals(number, 1), notEquals(number, 2))\|Filter column: and(notEquals(b, 2), notEquals(b, 1))"
252253
echo "> (analyzer) one condition of filter is pushed down before INNER JOIN"
253254
$CLICKHOUSE_CLIENT --enable_analyzer=1 -q "
254255
explain actions = 1
255256
select number as a, r.b from numbers(4) as l any inner join (
256257
select number + 2 as b from numbers(3)
257-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" |
258+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0" |
258259
grep -o "Join\|Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8))\|Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8))"
259260
$CLICKHOUSE_CLIENT -q "
260261
select number as a, r.b from numbers(4) as l any inner join (
261262
select number + 2 as b from numbers(3)
262-
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0"
263+
) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0, query_plan_join_swap_table = 0"
263264

264265
echo "> filter is pushed down before UNION"
265266
$CLICKHOUSE_CLIENT -q "

tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ SELECT * FROM t1 JOIN t2 ON (t2.key == t2.key2 AND (t1.key == t1.key2 AND t1.key
8888
SELECT * FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t1.id >= t2.id ORDER BY ALL SETTINGS enable_analyzer = 1;
8989

9090
SELECT '--';
91-
SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND length(t1.key2) == length(t2.key2) AND t1.key != '333';
91+
SELECT t1.*, t2.* FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND length(t1.key2) == length(t2.key2) AND t1.key != '333' ORDER BY ALL;
9292

9393
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t22.key == t22.key2 OR t1.id == t22.idd and t1.id == t22.id ORDER BY ALL;
9494
SELECT 't22', * FROM t1 JOIN t22 ON t1.id == t22.idd and t1.id == t22.id OR t1.id == t22.idd and t22.key == t22.key2 ORDER BY ALL;

tests/queries/0_stateless/02967_parallel_replicas_joins_and_analyzer.sql.j2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ insert into tab1 select number, number, number from numbers(16);
1010
insert into tab2 select number * 2, number * 2 from numbers(8);
1111
insert into tab3 select number * 4, number * 4 from numbers(4);
1212

13+
set query_plan_join_swap_table = 0; -- changes query plan
1314
set enable_analyzer = 1;
1415
set enable_parallel_replicas = 2, max_parallel_replicas = 2, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_local_plan = 1;
1516

0 commit comments

Comments
 (0)