Skip to content

Commit ac39b85

Browse files
Backport #94262 to 25.11: Fix data race in DataPartStorageOnDiskBase::remove vs system.parts
1 parent 600b27b commit ac39b85

File tree

3 files changed

+70
-1
lines changed

3 files changed

+70
-1
lines changed

src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,9 @@ void DataPartStorageOnDiskBase::remove(
747747
try
748748
{
749749
disk->moveDirectory(from, to);
750-
part_dir = part_dir_without_slash;
750+
/// NOTE: we intentionally don't update part_dir here because it would cause a data race
751+
/// with concurrent readers (e.g. system.parts table queries calling getFullPath()).
752+
/// The part is being removed anyway, so the path doesn't need to be updated.
751753
}
752754
catch (const Exception & e)
753755
{
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
OK
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env bash
2+
# Tags: race, no-parallel
3+
# Test for a race condition between reading system.parts and removing parts.
4+
# The race was in DataPartStorageOnDiskBase::remove() modifying part_dir
5+
# while getFullPath() was reading it concurrently.
6+
7+
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
8+
# shellcheck source=../shell_config.sh
9+
. "$CURDIR"/../shell_config.sh
10+
11+
set -e
12+
13+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP TABLE IF EXISTS part_race"
14+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "CREATE TABLE part_race (x UInt64) ENGINE = MergeTree ORDER BY x PARTITION BY x % 10
15+
SETTINGS old_parts_lifetime = 0, cleanup_delay_period = 0, cleanup_delay_period_random_add = 0,
16+
cleanup_thread_preferred_points_per_iteration = 0, max_cleanup_delay_period = 0"
17+
18+
TIMEOUT=30
19+
20+
function thread_insert()
21+
{
22+
local TIMELIMIT=$((SECONDS+TIMEOUT))
23+
local i=0
24+
while [ $SECONDS -lt "$TIMELIMIT" ]
25+
do
26+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "INSERT INTO part_race SELECT $i" 2>/dev/null
27+
((i++)) || true
28+
done
29+
}
30+
31+
function thread_drop_partition()
32+
{
33+
local TIMELIMIT=$((SECONDS+TIMEOUT))
34+
while [ $SECONDS -lt "$TIMELIMIT" ]
35+
do
36+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "ALTER TABLE part_race DROP PARTITION ID '$((RANDOM % 10))'" 2>/dev/null
37+
sleep 0.0$RANDOM
38+
done
39+
}
40+
41+
function thread_select_parts()
42+
{
43+
local TIMELIMIT=$((SECONDS+TIMEOUT))
44+
while [ $SECONDS -lt "$TIMELIMIT" ]
45+
do
46+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT name, path FROM system.parts WHERE database = '${CLICKHOUSE_DATABASE}' AND table = 'part_race' FORMAT Null" 2>/dev/null
47+
done
48+
}
49+
50+
# Start multiple instances of each thread
51+
thread_insert &
52+
thread_insert &
53+
54+
thread_drop_partition &
55+
thread_drop_partition &
56+
57+
thread_select_parts &
58+
thread_select_parts &
59+
thread_select_parts &
60+
thread_select_parts &
61+
62+
wait
63+
64+
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "DROP TABLE part_race"
65+
66+
echo "OK"

0 commit comments

Comments
 (0)