Skip to content

Commit 73544a3

Browse files
authored
Merge pull request ClickHouse#14845 from ClickHouse/fix_alias_array
Fix recursive column defaults
2 parents a679867 + cbe8532 commit 73544a3

File tree

5 files changed

+96
-36
lines changed

5 files changed

+96
-36
lines changed

src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp

Lines changed: 64 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
22
#include <Storages/MergeTree/MergeTreeData.h>
3+
#include <Common/checkStackSize.h>
34
#include <Common/typeid_cast.h>
45
#include <Columns/ColumnConst.h>
56
#include <unordered_set>
@@ -10,61 +11,89 @@ namespace DB
1011
namespace ErrorCodes
1112
{
1213
extern const int LOGICAL_ERROR;
14+
extern const int NO_SUCH_COLUMN_IN_TABLE;
1315
}
1416

17+
namespace
18+
{
19+
20+
/// Columns absent in part may depend on other absent columns so we are
21+
/// searching all required physical columns recursively. Return true if found at
22+
/// least one existing (physical) column in part.
23+
bool injectRequiredColumnsRecursively(
24+
const String & column_name,
25+
const ColumnsDescription & storage_columns,
26+
const MergeTreeData::AlterConversions & alter_conversions,
27+
const MergeTreeData::DataPartPtr & part,
28+
Names & columns,
29+
NameSet & required_columns,
30+
NameSet & injected_columns)
31+
{
32+
/// This is needed to prevent stack overflow in case of cyclic defaults or
33+
/// huge AST which for some reason was not validated on parsing/interpreter
34+
/// stages.
35+
checkStackSize();
36+
String column_name_in_part = column_name;
37+
if (alter_conversions.isColumnRenamed(column_name_in_part))
38+
column_name_in_part = alter_conversions.getColumnOldName(column_name_in_part);
39+
40+
/// column has files and hence does not require evaluation
41+
if (storage_columns.hasPhysical(column_name) && part->hasColumnFiles(column_name_in_part, *storage_columns.getPhysical(column_name).type))
42+
{
43+
/// ensure each column is added only once
44+
if (required_columns.count(column_name) == 0)
45+
{
46+
columns.emplace_back(column_name);
47+
required_columns.emplace(column_name);
48+
injected_columns.emplace(column_name);
49+
}
50+
return true;
51+
}
52+
53+
/// Column doesn't have default value and don't exist in part
54+
/// don't need to add to required set.
55+
const auto column_default = storage_columns.getDefault(column_name);
56+
if (!column_default)
57+
return false;
58+
59+
/// collect identifiers required for evaluation
60+
IdentifierNameSet identifiers;
61+
column_default->expression->collectIdentifierNames(identifiers);
62+
63+
bool result = false;
64+
for (const auto & identifier : identifiers)
65+
result |= injectRequiredColumnsRecursively(identifier, storage_columns, alter_conversions, part, columns, required_columns, injected_columns);
66+
67+
return result;
68+
}
69+
70+
}
1571

1672
NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns)
1773
{
1874
NameSet required_columns{std::begin(columns), std::end(columns)};
1975
NameSet injected_columns;
2076

21-
auto all_column_files_missing = true;
77+
bool have_at_least_one_physical_column = false;
2278

2379
const auto & storage_columns = metadata_snapshot->getColumns();
2480
auto alter_conversions = storage.getAlterConversionsForPart(part);
2581
for (size_t i = 0; i < columns.size(); ++i)
2682
{
27-
/// possibly renamed
28-
auto column_name_in_part = columns[i];
29-
30-
if (alter_conversions.isColumnRenamed(column_name_in_part))
31-
column_name_in_part = alter_conversions.getColumnOldName(column_name_in_part);
32-
33-
/// column has files and hence does not require evaluation
34-
if (part->hasColumnFiles(column_name_in_part, *storage_columns.getPhysical(columns[i]).type))
35-
{
36-
all_column_files_missing = false;
37-
continue;
38-
}
39-
40-
const auto column_default = storage_columns.getDefault(columns[i]);
41-
if (!column_default)
42-
continue;
83+
/// We are going to fetch only physical columns
84+
if (!storage_columns.hasPhysical(columns[i]))
85+
throw Exception("There is no physical column " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
4386

44-
/// collect identifiers required for evaluation
45-
IdentifierNameSet identifiers;
46-
column_default->expression->collectIdentifierNames(identifiers);
47-
48-
for (const auto & identifier : identifiers)
49-
{
50-
if (storage_columns.hasPhysical(identifier))
51-
{
52-
/// ensure each column is added only once
53-
if (required_columns.count(identifier) == 0)
54-
{
55-
columns.emplace_back(identifier);
56-
required_columns.emplace(identifier);
57-
injected_columns.emplace(identifier);
58-
}
59-
}
60-
}
87+
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
88+
columns[i], storage_columns, alter_conversions,
89+
part, columns, required_columns, injected_columns);
6190
}
6291

6392
/** Add a column of the minimum size.
6493
* Used in case when no column is needed or files are missing, but at least you need to know number of rows.
6594
* Adds to the columns.
6695
*/
67-
if (all_column_files_missing)
96+
if (!have_at_least_one_physical_column)
6897
{
6998
const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(metadata_snapshot);
7099
columns.push_back(minimum_size_column_name);

tests/queries/0_stateless/01084_defaults_on_aliases.reference

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
1 1
22
1 1 1
3+
1
34
2 2 4
45
2 2 2 4
56
3 3 9

tests/queries/0_stateless/01084_defaults_on_aliases.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@ DROP TABLE IF EXISTS table_with_defaults_on_aliases;
22

33
CREATE TABLE table_with_defaults_on_aliases (col1 UInt32, col2 ALIAS col1, col3 DEFAULT col2) Engine = MergeTree() ORDER BY tuple();
44

5+
SYSTEM STOP MERGES table_with_defaults_on_aliases;
6+
57
INSERT INTO table_with_defaults_on_aliases (col1) VALUES (1);
68

79
SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 1;
810

911
SELECT col1, col2, col3 FROM table_with_defaults_on_aliases WHERE col1 = 1;
1012

13+
SELECT col3 FROM table_with_defaults_on_aliases; -- important to check without WHERE
14+
1115
ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col4 UInt64 DEFAULT col2 * col3;
1216

1317
INSERT INTO table_with_defaults_on_aliases (col1) VALUES (2);
@@ -24,7 +28,6 @@ SELECT * FROM table_with_defaults_on_aliases WHERE col1 = 3;
2428

2529
SELECT col1, col2, col3, col4, col5 FROM table_with_defaults_on_aliases WHERE col1 = 3;
2630

27-
2831
ALTER TABLE table_with_defaults_on_aliases ADD COLUMN col6 UInt64 MATERIALIZED col2 * col4;
2932

3033
DROP TABLE IF EXISTS table_with_defaults_on_aliases;
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
a1 b1
2+
a2 b2
3+
a3 b3
4+
c1
5+
c2
6+
c3
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
DROP TABLE IF EXISTS test_new_col;
2+
3+
CREATE TABLE test_new_col
4+
(
5+
`_csv` String,
6+
`csv_as_array` Array(String) ALIAS splitByChar(';',_csv),
7+
`csv_col1` String DEFAULT csv_as_array[1],
8+
`csv_col2` String DEFAULT csv_as_array[2]
9+
)
10+
ENGINE = MergeTree
11+
ORDER BY tuple();
12+
13+
INSERT INTO test_new_col (_csv) VALUES ('a1;b1;c1;d1'), ('a2;b2;c2;d2'), ('a3;b3;c3;d3');
14+
15+
SELECT csv_col1, csv_col2 FROM test_new_col ORDER BY csv_col1;
16+
17+
ALTER TABLE test_new_col ADD COLUMN `csv_col3` String DEFAULT csv_as_array[3];
18+
19+
SELECT csv_col3 FROM test_new_col ORDER BY csv_col3;
20+
21+
DROP TABLE IF EXISTS test_new_col;

0 commit comments

Comments
 (0)