Skip to content

Commit a8015d9

Browse files
Backport #79727 to 25.5: Analyzer: Fix correlated column expression check
1 parent 58864ff commit a8015d9

9 files changed

+161
-46
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#pragma once
2+
3+
#include <Analyzer/IQueryTreeNode.h>
4+
#include <Analyzer/Utils.h>
5+
6+
#include <Analyzer/Resolve/IdentifierResolveScope.h>
7+
8+
#include <unordered_map>
9+
#include <vector>
10+
11+
namespace DB
12+
{
13+
14+
class IQueryTreeNode;
15+
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
16+
using QueryTreeNodes = std::vector<QueryTreeNodePtr>;
17+
18+
struct CorrelatedColumnsCollector
19+
{
20+
explicit CorrelatedColumnsCollector(
21+
const QueryTreeNodePtr & expression,
22+
const IdentifierResolveScope * current_scope,
23+
const std::unordered_map<QueryTreeNodePtr, IdentifierResolveScope> & map
24+
)
25+
: node_to_scope_map(map)
26+
, correlated_columns()
27+
{
28+
visitExpression(expression, current_scope);
29+
}
30+
31+
bool has() const { return !correlated_columns.empty(); }
32+
33+
const QueryTreeNodes & get() const { return correlated_columns; }
34+
35+
private:
36+
void visitExpression(const QueryTreeNodePtr & expression, const IdentifierResolveScope * current_scope)
37+
{
38+
QueryTreeNodes nodes_to_process = { expression };
39+
while (!nodes_to_process.empty())
40+
{
41+
auto current = nodes_to_process.back();
42+
nodes_to_process.pop_back();
43+
44+
auto current_node_type = current->getNodeType();
45+
46+
/// Change scope if expression is a QueryNode.
47+
/// Columns in these subqueries can appear from table expressions
48+
/// that are registered in the child scope.
49+
if (current_node_type == QueryTreeNodeType::QUERY)
50+
{
51+
auto it = node_to_scope_map.find(current);
52+
if (it != node_to_scope_map.end() && current != current_scope->scope_node)
53+
{
54+
visitExpression(current, &it->second);
55+
continue;
56+
}
57+
}
58+
59+
if (current_node_type == QueryTreeNodeType::COLUMN)
60+
{
61+
if (checkCorrelatedColumn(current_scope, current))
62+
correlated_columns.push_back(current);
63+
}
64+
65+
for (const auto & child : current->getChildren())
66+
{
67+
if (child)
68+
nodes_to_process.push_back(child);
69+
}
70+
}
71+
}
72+
73+
const std::unordered_map<QueryTreeNodePtr, IdentifierResolveScope> & node_to_scope_map;
74+
QueryTreeNodes correlated_columns;
75+
};
76+
77+
}

src/Analyzer/Resolve/QueryAnalyzer.cpp

Lines changed: 36 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include <Interpreters/ProcessorsProfileLog.h>
21
#include <Common/FieldVisitorToString.h>
32

43
#include <Columns/ColumnNullable.h>
@@ -21,16 +20,18 @@
2120
#include <Functions/exists.h>
2221
#include <Functions/grouping.h>
2322

24-
#include <TableFunctions/TableFunctionFactory.h>
2523
#include <Formats/FormatFactory.h>
2624

25+
#include <TableFunctions/TableFunctionFactory.h>
26+
2727
#include <Storages/IStorage.h>
2828
#include <Storages/StorageJoin.h>
2929

30-
#include <Interpreters/misc.h>
3130
#include <Interpreters/convertFieldToType.h>
31+
#include <Interpreters/misc.h>
3232
#include <Interpreters/ExternalDictionariesLoader.h>
3333
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
34+
#include <Interpreters/ProcessorsProfileLog.h>
3435

3536
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
3637

@@ -62,6 +63,7 @@
6263
#include <Analyzer/FunctionSecretArgumentsFinderTreeNode.h>
6364
#include <Analyzer/RecursiveCTE.h>
6465

66+
#include <Analyzer/Resolve/CorrelatedColumnsCollector.h>
6567
#include <Analyzer/Resolve/IdentifierResolveScope.h>
6668
#include <Analyzer/Resolve/QueryAnalyzer.h>
6769
#include <Analyzer/Resolve/QueryExpressionsAliasVisitor.h>
@@ -70,10 +72,10 @@
7072
#include <Analyzer/Resolve/TableFunctionsWithClusterAlternativesVisitor.h>
7173
#include <Analyzer/Resolve/TypoCorrection.h>
7274

73-
#include <Planner/PlannerActionsVisitor.h>
74-
7575
#include <Core/Settings.h>
7676

77+
#include <ranges>
78+
7779
namespace ProfileEvents
7880
{
7981
extern const Event ScalarSubqueriesGlobalCacheHit;
@@ -162,7 +164,7 @@ QueryAnalyzer::~QueryAnalyzer() = default;
162164

163165
void QueryAnalyzer::resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context)
164166
{
165-
IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
167+
IdentifierResolveScope & scope = createIdentifierResolveScope(node, /*parent_scope=*/ nullptr);
166168

167169
if (!scope.context)
168170
scope.context = context;
@@ -232,7 +234,7 @@ void QueryAnalyzer::resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & ta
232234

233235
void QueryAnalyzer::resolveConstantExpression(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context)
234236
{
235-
IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
237+
IdentifierResolveScope & scope = createIdentifierResolveScope(node, /*parent_scope=*/ nullptr);
236238

237239
if (!scope.context)
238240
scope.context = context;
@@ -293,6 +295,12 @@ std::optional<JoinTableSide> QueryAnalyzer::getColumnSideFromJoinTree(const Quer
293295
return {};
294296
}
295297

298+
IdentifierResolveScope & QueryAnalyzer::createIdentifierResolveScope(const QueryTreeNodePtr & scope_node, IdentifierResolveScope * parent_scope)
299+
{
300+
auto [it, _] = node_to_scope_map.emplace(scope_node, IdentifierResolveScope{scope_node, parent_scope});
301+
return it->second;
302+
}
303+
296304
ProjectionName QueryAnalyzer::calculateFunctionProjectionName(const QueryTreeNodePtr & function_node, const ProjectionNames & parameters_projection_names,
297305
const ProjectionNames & arguments_projection_names)
298306
{
@@ -1382,31 +1390,16 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
13821390
if (identifier_lookup.isFunctionLookup())
13831391
return resolve_result;
13841392

1385-
QueryTreeNodes nodes_to_process = { resolved_identifier };
1386-
while (!nodes_to_process.empty())
1393+
CorrelatedColumnsCollector correlated_columns_collector{resolved_identifier, identifier_resolve_context.scope_to_resolve_alias_expression, node_to_scope_map};
1394+
if (correlated_columns_collector.has() && !scope.context->getSettingsRef()[Setting::allow_experimental_correlated_subqueries])
13871395
{
1388-
auto current = nodes_to_process.back();
1389-
nodes_to_process.pop_back();
1390-
if (current->getNodeType() == QueryTreeNodeType::COLUMN)
1391-
{
1392-
auto is_correlated_column = checkCorrelatedColumn(&scope, current);
1393-
if (is_correlated_column && !scope.context->getSettingsRef()[Setting::allow_experimental_correlated_subqueries])
1394-
{
1395-
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
1396-
"Resolved identifier '{}' in parent scope to expression '{}' with correlated column '{}'"
1397-
" (Enable 'allow_experimental_correlated_subqueries' setting to allow correlated subqueries execution). In scope {}",
1398-
identifier_lookup.identifier.getFullName(),
1399-
resolved_identifier->formatASTForErrorMessage(),
1400-
current->as<ColumnNode>()->getColumnName(),
1401-
scope.scope_node->formatASTForErrorMessage());
1402-
}
1403-
}
1404-
1405-
for (const auto & child : current->getChildren())
1406-
{
1407-
if (child)
1408-
nodes_to_process.push_back(child);
1409-
}
1396+
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
1397+
"Resolved identifier '{}' in parent scope to expression '{}' with correlated columns '{}'"
1398+
" (Enable 'allow_experimental_correlated_subqueries' setting to allow correlated subqueries execution). In scope {}",
1399+
identifier_lookup.identifier.getFullName(),
1400+
resolved_identifier->formatASTForErrorMessage(),
1401+
fmt::join(correlated_columns_collector.get() | std::views::transform([](const auto & e) { return e->template as<ColumnNode>()->getColumnName(); }), "', '"),
1402+
scope.scope_node->formatASTForErrorMessage());
14101403
}
14111404

14121405
return resolve_result;
@@ -2317,7 +2310,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
23172310
if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::LAMBDA)
23182311
{
23192312
auto lambda_expression_to_resolve = expression_node->clone();
2320-
IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/);
2313+
auto & lambda_scope = createIdentifierResolveScope(lambda_expression_to_resolve, /*parent_scope=*/&scope);
23212314
node_projection_names = resolveLambda(expression_node, lambda_expression_to_resolve, {node}, lambda_scope);
23222315
auto & lambda_expression_to_resolve_typed = lambda_expression_to_resolve->as<LambdaNode &>();
23232316
node = lambda_expression_to_resolve_typed.getExpression();
@@ -3111,7 +3104,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
31113104
auto first_argument_type = in_first_argument->getNodeType();
31123105
if (first_argument_type == QueryTreeNodeType::QUERY || first_argument_type == QueryTreeNodeType::UNION)
31133106
{
3114-
IdentifierResolveScope subquery_scope(in_first_argument, &scope /*parent_scope*/);
3107+
IdentifierResolveScope & subquery_scope = createIdentifierResolveScope(in_first_argument, &scope /*parent_scope*/);
31153108
subquery_scope.subquery_depth = scope.subquery_depth + 1;
31163109

31173110
evaluateScalarSubqueryIfNeeded(in_first_argument, subquery_scope);
@@ -3240,7 +3233,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
32403233

32413234
auto lambda_expression_clone = lambda_expression_untyped->clone();
32423235

3243-
IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/);
3236+
IdentifierResolveScope & lambda_scope = createIdentifierResolveScope(lambda_expression_clone, &scope /*parent_scope*/);
32443237
ProjectionNames lambda_projection_names = resolveLambda(lambda_expression_untyped, lambda_expression_clone, function_arguments, lambda_scope);
32453238

32463239
auto & resolved_lambda = lambda_expression_clone->as<LambdaNode &>();
@@ -3488,7 +3481,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
34883481
QueryTreeNodes lambda_arguments;
34893482
lambda_arguments.reserve(lambda_arguments_size);
34903483

3491-
IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/);
3484+
IdentifierResolveScope & lambda_scope = createIdentifierResolveScope(lambda_to_resolve, &scope /*parent_scope*/);
34923485
for (size_t i = 0; i < lambda_arguments_size; ++i)
34933486
{
34943487
const auto & argument_type = function_data_type_argument_types[i];
@@ -3715,7 +3708,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
37153708
auto node_type = node->getNodeType();
37163709
if (!allow_table_expression && (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION))
37173710
{
3718-
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
3711+
IdentifierResolveScope & subquery_scope = createIdentifierResolveScope(node, &scope /*parent_scope*/);
37193712
subquery_scope.subquery_depth = scope.subquery_depth + 1;
37203713

37213714
evaluateScalarSubqueryIfNeeded(node, subquery_scope);
@@ -3790,7 +3783,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
37903783
else
37913784
union_node->setIsCTE(false);
37923785

3793-
IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/);
3786+
IdentifierResolveScope & subquery_scope = createIdentifierResolveScope(resolved_identifier_node, &scope /*parent_scope*/);
37943787
subquery_scope.subquery_depth = scope.subquery_depth + 1;
37953788

37963789
/// CTE is being resolved, it's required to forbid to resolve to it again
@@ -3927,7 +3920,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
39273920
[[fallthrough]];
39283921
case QueryTreeNodeType::UNION:
39293922
{
3930-
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
3923+
IdentifierResolveScope & subquery_scope = createIdentifierResolveScope(node, &scope /*parent_scope*/);
39313924
subquery_scope.subquery_depth = scope.subquery_depth + 1;
39323925

39333926
std::string projection_name = "_subquery_" + std::to_string(subquery_counter);
@@ -4356,7 +4349,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
43564349
bool is_column_constant = interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT;
43574350

43584351
auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression();
4359-
IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/);
4352+
IdentifierResolveScope & interpolate_scope = createIdentifierResolveScope(interpolation_to_resolve, &scope /*parent_scope*/);
43604353

43614354
auto fake_column_node = std::make_shared<ColumnNode>(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node);
43624355
if (is_column_constant)
@@ -4661,7 +4654,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
46614654
*/
46624655
alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name];
46634656

4664-
IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/);
4657+
IdentifierResolveScope & alias_column_resolve_scope = createIdentifierResolveScope(alias_column_to_resolve, &scope /*parent_scope*/);
46654658
alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node);
46664659
alias_column_resolve_scope.context = scope.context;
46674660

@@ -5282,7 +5275,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS
52825275
left_subquery->getProjection().getNodes().push_back(projection_node->clone());
52835276
left_subquery->getJoinTree() = left_table_expression;
52845277

5285-
IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/);
5278+
IdentifierResolveScope & left_subquery_scope = createIdentifierResolveScope(left_subquery, nullptr /*parent_scope*/);
52865279
resolveQuery(left_subquery, left_subquery_scope);
52875280

52885281
const auto & resolved_nodes = left_subquery->getProjection().getNodes();
@@ -5905,7 +5898,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
59055898
auto & non_recursive_query_mutable_context = non_recursive_query_is_query_node ? non_recursive_query->as<QueryNode &>().getMutableContext()
59065899
: non_recursive_query->as<UnionNode &>().getMutableContext();
59075900

5908-
IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/);
5901+
IdentifierResolveScope & non_recursive_subquery_scope = createIdentifierResolveScope(non_recursive_query, &scope /*parent_scope*/);
59095902
non_recursive_subquery_scope.subquery_depth = scope.subquery_depth + 1;
59105903

59115904
if (non_recursive_query_is_query_node)
@@ -5936,7 +5929,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier
59365929
{
59375930
auto & query_node = queries_nodes[i];
59385931

5939-
IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/);
5932+
IdentifierResolveScope & subquery_scope = createIdentifierResolveScope(query_node, &scope /*parent_scope*/);
59405933

59415934
if (recursive_cte_table_node)
59425935
subquery_scope.expression_argument_name_to_node[union_node_typed.getCTEName()] = recursive_cte_table_node;

src/Analyzer/Resolve/QueryAnalyzer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <unordered_map>
34
#include <Interpreters/Context_fwd.h>
45
#include <Analyzer/HashUtils.h>
56
#include <Analyzer/IQueryTreeNode.h>
@@ -124,6 +125,8 @@ class QueryAnalyzer
124125
private:
125126
/// Utility functions
126127

128+
IdentifierResolveScope & createIdentifierResolveScope(const QueryTreeNodePtr & scope_node, IdentifierResolveScope * parent_scope);
129+
127130
static ProjectionName calculateFunctionProjectionName(const QueryTreeNodePtr & function_node,
128131
const ProjectionNames & parameters_projection_names,
129132
const ProjectionNames & arguments_projection_names);
@@ -285,6 +288,8 @@ class QueryAnalyzer
285288
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_local;
286289
std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_global;
287290

291+
std::unordered_map<QueryTreeNodePtr, IdentifierResolveScope> node_to_scope_map;
292+
288293
const bool only_analyze;
289294
};
290295

src/Analyzer/Utils.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,11 @@ bool isCorrelatedQueryOrUnionNode(const QueryTreeNodePtr & node)
251251
}
252252

253253
bool checkCorrelatedColumn(
254-
IdentifierResolveScope * scope_to_check,
254+
const IdentifierResolveScope * scope_to_check,
255255
const QueryTreeNodePtr & column
256256
)
257257
{
258-
auto * current_scope = scope_to_check;
258+
const auto * current_scope = scope_to_check;
259259
chassert(column->getNodeType() == QueryTreeNodeType::COLUMN);
260260
auto * column_node = column->as<ColumnNode>();
261261
auto column_source = column_node->getColumnSource();
@@ -272,9 +272,15 @@ bool checkCorrelatedColumn(
272272

273273
while (scope_to_check != nullptr)
274274
{
275+
/// Check if column source is in the FROM section of the current scope (query).
275276
if (scope_to_check->registered_table_expression_nodes.contains(column_source))
276277
break;
277278

279+
/// Previous check wouldn't work in the case of resolution of alias columns.
280+
/// In that case table expression is not registered yet and table expression data is being computed.
281+
if (scope_to_check->table_expressions_in_resolve_process.contains(column_source.get()))
282+
break;
283+
278284
if (isQueryOrUnionNode(scope_to_check->scope_node))
279285
{
280286
is_correlated = true;

src/Analyzer/Utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ bool isCorrelatedQueryOrUnionNode(const QueryTreeNodePtr & node);
7171
* in corresponding QueryNode or UnionNode.
7272
*/
7373
bool checkCorrelatedColumn(
74-
IdentifierResolveScope * scope_to_check,
74+
const IdentifierResolveScope * scope_to_check,
7575
const QueryTreeNodePtr & column
7676
);
7777

tests/queries/0_stateless/03444_analyzer_resolve_alias_columns.reference

Whitespace-only changes.
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
CREATE TABLE users (
2+
uid Int16,
3+
name String,
4+
age Int16,
5+
v Array(Int16) ALIAS arrayMap(x -> age, array(name))
6+
) ENGINE=Memory;
7+
8+
INSERT INTO users VALUES (1231, 'John', 33);
9+
INSERT INTO users VALUES (6666, 'Ksenia', 48);
10+
INSERT INTO users VALUES (8888, 'Alice', 50);
11+
12+
SELECT * FROM users FORMAT Null;
13+
14+
CREATE TABLE out1
15+
(
16+
id UInt64,
17+
j JSON,
18+
name Array(UInt32) ALIAS arrayMap(x -> toUInt32(x), JSONAllPaths(j)),
19+
value Array(Array(UInt32)) ALIAS arrayMap(x -> JSONExtract(CAST(j, 'String'), indexOf(name, x), 'Array(UInt32)'), name)
20+
)
21+
ORDER BY id;
22+
23+
INSERT INTO out1 SELECT 42, '{"a" : 42}';
24+
25+
SELECT * FROM out1 FORMAT Null;
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
0
2+
1

0 commit comments

Comments
 (0)