Skip to content

Commit eb9ee72

Browse files
Merge pull request #11913 from ClickHouse/compact-parts-by-default
Enable compact parts by default for small parts
2 parents 6e0bdaf + d1e90e0 commit eb9ee72

File tree

49 files changed

+241
-206
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+241
-206
lines changed

src/Interpreters/SystemLog.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ std::shared_ptr<TSystemLog> createSystemLog(
6363
engine = "ENGINE = MergeTree";
6464
if (!partition_by.empty())
6565
engine += " PARTITION BY (" + partition_by + ")";
66-
engine += " ORDER BY (event_date, event_time)"
67-
"SETTINGS min_bytes_for_wide_part = '10M'"; /// Use polymorphic parts for log tables by default
66+
engine += " ORDER BY (event_date, event_time)";
6867
}
6968

7069
size_t flush_interval_milliseconds = config.getUInt64(config_prefix + ".flush_interval_milliseconds",

src/Storages/MergeTree/MergeTreeSettings.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct Settings;
2020
M(UInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \
2121
\
2222
/** Data storing format settings. */ \
23-
M(UInt64, min_bytes_for_wide_part, 0, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
23+
M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \
2424
M(UInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \
2525
M(UInt64, min_bytes_for_compact_part, 0, "Experimental. Minimal uncompressed size in bytes to create part in compact format instead of saving it in RAM", 0) \
2626
M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \
@@ -105,6 +105,9 @@ struct Settings;
105105
M(String, storage_policy, "default", "Name of storage disk policy", 0) \
106106
M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
107107
\
108+
/** Settings for testing purposes */ \
109+
M(Bool, randomize_part_type, false, "For testing purposes only. Randomizes part type between wide and compact", 0) \
110+
\
108111
/** Obsolete settings. Kept for backward compatibility only. */ \
109112
M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
110113
M(UInt64, check_delay_period, 60, "Obsolete setting, does nothing.", 0) \

src/Storages/MergeTree/registerStorageMergeTree.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <Common/Macros.h>
99
#include <Common/OptimizedRegularExpression.h>
1010
#include <Common/typeid_cast.h>
11+
#include <Common/thread_local_rng.h>
1112

1213
#include <Parsers/ASTCreateQuery.h>
1314
#include <Parsers/ASTExpressionList.h>
@@ -233,6 +234,25 @@ If you use the Replicated version of engines, see https://clickhouse.tech/docs/e
233234
}
234235

235236

237+
static void randomizePartTypeSettings(const std::unique_ptr<MergeTreeSettings> & storage_settings)
238+
{
239+
static constexpr auto MAX_THRESHOLD_FOR_ROWS = 100000;
240+
static constexpr auto MAX_THRESHOLD_FOR_BYTES = 1024 * 1024 * 10;
241+
242+
/// Create all parts in wide format with probability 1/3.
243+
if (thread_local_rng() % 3 == 0)
244+
{
245+
storage_settings->min_rows_for_wide_part = 0;
246+
storage_settings->min_bytes_for_wide_part = 0;
247+
}
248+
else
249+
{
250+
storage_settings->min_rows_for_wide_part = std::uniform_int_distribution{0, MAX_THRESHOLD_FOR_ROWS}(thread_local_rng);
251+
storage_settings->min_bytes_for_wide_part = std::uniform_int_distribution{0, MAX_THRESHOLD_FOR_BYTES}(thread_local_rng);
252+
}
253+
}
254+
255+
236256
static StoragePtr create(const StorageFactory::Arguments & args)
237257
{
238258
/** [Replicated][|Summing|Collapsing|Aggregating|Replacing|Graphite]MergeTree (2 * 7 combinations) engines
@@ -653,6 +673,20 @@ static StoragePtr create(const StorageFactory::Arguments & args)
653673
++arg_num;
654674
}
655675

676+
/// Allow to randomize part type for tests to cover more cases.
677+
/// But if settings were set explicitly restrict it.
678+
if (storage_settings->randomize_part_type
679+
&& !storage_settings->min_rows_for_wide_part.changed
680+
&& !storage_settings->min_bytes_for_wide_part.changed)
681+
{
682+
randomizePartTypeSettings(storage_settings);
683+
LOG_INFO(&Poco::Logger::get(args.table_id.getNameForLogs() + " (registerStorageMergeTree)"),
684+
"Applied setting 'randomize_part_type'. "
685+
"Setting 'min_rows_for_wide_part' changed to {}. "
686+
"Setting 'min_bytes_for_wide_part' changed to {}.",
687+
storage_settings->min_rows_for_wide_part, storage_settings->min_bytes_for_wide_part);
688+
}
689+
656690
if (arg_num != arg_cnt)
657691
throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS);
658692

tests/clickhouse-test

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -506,15 +506,6 @@ def collect_build_flags(client):
506506
else:
507507
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
508508

509-
clickhouse_proc = Popen(shlex.split(client), stdin=PIPE, stdout=PIPE, stderr=PIPE)
510-
(stdout, stderr) = clickhouse_proc.communicate("SELECT value FROM system.merge_tree_settings WHERE name = 'min_bytes_for_wide_part'")
511-
512-
if clickhouse_proc.returncode == 0:
513-
if '10485760' in stdout:
514-
result.append(BuildFlags.POLYMORPHIC_PARTS)
515-
else:
516-
raise Exception("Cannot get inforamtion about build from server errorcode {}, stderr {}".format(clickhouse_proc.returncode, stderr))
517-
518509
return result
519510

520511

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<yandex>
2+
<merge_tree>
3+
<min_rows_for_wide_part>0</min_rows_for_wide_part>
4+
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
5+
</merge_tree>
6+
</yandex>

tests/integration/test_adaptive_granularity/test.py

Lines changed: 33 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,23 @@
66
from helpers.test_tools import assert_eq_with_retry
77

88
cluster = ClickHouseCluster(__file__)
9-
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
10-
with_zookeeper=True)
11-
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
12-
with_zookeeper=True)
13-
14-
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
15-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18',
16-
with_installed_binary=True)
17-
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
18-
with_zookeeper=True)
19-
20-
node5 = cluster.add_instance('node5', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
21-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15',
22-
with_installed_binary=True)
23-
node6 = cluster.add_instance('node6', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
24-
with_zookeeper=True)
25-
26-
node7 = cluster.add_instance('node7', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
27-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True,
28-
with_installed_binary=True)
29-
node8 = cluster.add_instance('node8', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
30-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True,
31-
with_installed_binary=True)
32-
33-
node9 = cluster.add_instance('node9', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml',
34-
'configs/merge_tree_settings.xml'], with_zookeeper=True,
35-
image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True,
36-
with_installed_binary=True)
37-
node10 = cluster.add_instance('node10', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml',
38-
'configs/merge_tree_settings.xml'], with_zookeeper=True,
39-
image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True,
40-
with_installed_binary=True)
41-
42-
node11 = cluster.add_instance('node11', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
43-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True,
44-
with_installed_binary=True)
45-
node12 = cluster.add_instance('node12', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'],
46-
with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True,
47-
with_installed_binary=True)
9+
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
10+
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
11+
12+
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', with_installed_binary=True)
13+
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/wide_parts_only.xml'], with_zookeeper=True)
14+
15+
node5 = cluster.add_instance('node5', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', with_installed_binary=True)
16+
node6 = cluster.add_instance('node6', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/wide_parts_only.xml'], with_zookeeper=True)
17+
18+
node7 = cluster.add_instance('node7', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
19+
node8 = cluster.add_instance('node8', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
20+
21+
node9 = cluster.add_instance('node9', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
22+
node10 = cluster.add_instance('node10', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
23+
24+
node11 = cluster.add_instance('node11', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
25+
node12 = cluster.add_instance('node12', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
4826

4927

5028
def prepare_single_pair_with_setting(first_node, second_node, group):
@@ -296,10 +274,14 @@ def test_mixed_granularity_single_node(start_dynamic_cluster, node):
296274
"INSERT INTO table_with_default_granularity VALUES (toDate('2018-09-01'), 1, 333), (toDate('2018-09-02'), 2, 444)")
297275

298276
def callback(n):
299-
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml",
300-
"<yandex><merge_tree><enable_mixed_granularity_parts>1</enable_mixed_granularity_parts></merge_tree></yandex>")
301-
n.replace_config("/etc/clickhouse-server/config.d/merge_tree_settings.xml",
302-
"<yandex><merge_tree><enable_mixed_granularity_parts>1</enable_mixed_granularity_parts></merge_tree></yandex>")
277+
new_config = """
278+
<yandex><merge_tree>
279+
<enable_mixed_granularity_parts>1</enable_mixed_granularity_parts>
280+
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
281+
</merge_tree></yandex>"""
282+
283+
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml", new_config)
284+
n.replace_config("/etc/clickhouse-server/config.d/merge_tree_settings.xml", new_config)
303285

304286
node.restart_with_latest_version(callback_onstop=callback)
305287
node.query("SYSTEM RELOAD CONFIG")
@@ -342,10 +324,14 @@ def test_version_update_two_nodes(start_dynamic_cluster):
342324
assert node12.query("SELECT COUNT() FROM table_with_default_granularity") == '2\n'
343325

344326
def callback(n):
345-
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml",
346-
"<yandex><merge_tree><enable_mixed_granularity_parts>0</enable_mixed_granularity_parts></merge_tree></yandex>")
347-
n.replace_config("/etc/clickhouse-server/config.d/merge_tree_settings.xml",
348-
"<yandex><merge_tree><enable_mixed_granularity_parts>0</enable_mixed_granularity_parts></merge_tree></yandex>")
327+
new_config = """
328+
<yandex><merge_tree>
329+
<enable_mixed_granularity_parts>0</enable_mixed_granularity_parts>
330+
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
331+
</merge_tree></yandex>"""
332+
333+
n.replace_config("/etc/clickhouse-server/merge_tree_settings.xml", new_config)
334+
n.replace_config("/etc/clickhouse-server/config.d/merge_tree_settings.xml", new_config)
349335

350336
node12.restart_with_latest_version(callback_onstop=callback)
351337

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<yandex>
2+
<merge_tree>
3+
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
4+
</merge_tree>
5+
</yandex>

tests/integration/test_backward_compatibility/test.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@
33
from helpers.cluster import ClickHouseCluster
44

55
cluster = ClickHouseCluster(__file__)
6-
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.17.8.54',
7-
stay_alive=True, with_installed_binary=True)
8-
node2 = cluster.add_instance('node2', with_zookeeper=True)
6+
node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server', tag='19.17.8.54', stay_alive=True, with_installed_binary=True)
7+
node2 = cluster.add_instance('node2', main_configs=['configs/wide_parts_only.xml'], with_zookeeper=True)
98

109

1110
@pytest.fixture(scope="module")
@@ -25,7 +24,7 @@ def start_cluster():
2524
cluster.shutdown()
2625

2726

28-
def test_backward_compatability(start_cluster):
27+
def test_backward_compatability1(start_cluster):
2928
node2.query("INSERT INTO t VALUES (today(), 1)")
3029
node1.query("SYSTEM SYNC REPLICA t", timeout=10)
3130

tests/integration/test_check_table/test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def started_cluster():
2121

2222
node1.query('''
2323
CREATE TABLE non_replicated_mt(date Date, id UInt32, value Int32)
24-
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id;
24+
ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY id
25+
SETTINGS min_bytes_for_wide_part=0;
2526
''')
2627

2728
yield cluster
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<yandex>
2+
<merge_tree>
3+
<min_rows_for_wide_part>0</min_rows_for_wide_part>
4+
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
5+
</merge_tree>
6+
</yandex>

0 commit comments

Comments
 (0)