Skip to content

Commit 423ed0f

Browse files
azatclaude
andcommitted
Add a test for parquet metadata cache w/o query context
Co-Authored-By: Claude <[email protected]>
1 parent bda4c37 commit 423ed0f

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
100 4950
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env bash
2+
# Tags: no-fasttest
3+
# Tag no-fasttest: Depends on S3
4+
5+
# Regression test: S3Queue background threads read Parquet files without a
6+
# query context on CurrentThread. Before the fix, the format factory lambda
7+
# called CurrentThread::getQueryContext()->getParquetMetadataCache() which
8+
# crashed because getQueryContext() returned null in background threads.
9+
10+
set -e
11+
12+
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
13+
# shellcheck source=../shell_config.sh
14+
. "$CUR_DIR"/../shell_config.sh
15+
16+
S3_PATH="test/${CLICKHOUSE_DATABASE}_04034"
17+
18+
# Write a Parquet file to S3
19+
$CLICKHOUSE_CLIENT -q "
20+
INSERT INTO FUNCTION s3(s3_conn, url = 'http://localhost:11111/${S3_PATH}/data.parquet', format = Parquet)
21+
SELECT number AS id, toString(number) AS name FROM numbers(100)
22+
SETTINGS s3_truncate_on_insert = 1
23+
"
24+
25+
# Create destination table
26+
$CLICKHOUSE_CLIENT -q "
27+
CREATE TABLE ${CLICKHOUSE_DATABASE}.dest (id UInt64, name String)
28+
ENGINE = MergeTree ORDER BY id
29+
"
30+
31+
# Create S3Queue table reading Parquet with metadata cache + native reader v3.
32+
# The background processing thread has no query context on CurrentThread.
33+
$CLICKHOUSE_CLIENT --send_logs_level=error -q "
34+
CREATE TABLE ${CLICKHOUSE_DATABASE}.queue (id UInt64, name String)
35+
ENGINE = S3Queue('http://localhost:11111/${S3_PATH}/*.parquet', 'Parquet')
36+
SETTINGS
37+
keeper_path = '/clickhouse/${CLICKHOUSE_DATABASE}/04034_s3queue',
38+
mode = 'unordered',
39+
after_processing = 'keep',
40+
s3queue_processing_threads_num = 1,
41+
s3queue_polling_min_timeout_ms = 100,
42+
s3queue_polling_max_timeout_ms = 500,
43+
input_format_parquet_use_native_reader_v3 = 1,
44+
use_parquet_metadata_cache = 1
45+
"
46+
47+
# Create MV to trigger background reads from S3Queue into dest table
48+
$CLICKHOUSE_CLIENT -q "
49+
CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}.mv TO ${CLICKHOUSE_DATABASE}.dest
50+
AS SELECT id, name FROM ${CLICKHOUSE_DATABASE}.queue
51+
"
52+
53+
# Wait for S3Queue to process the file (up to 30 seconds)
54+
for _ in {1..60}; do
55+
count=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM ${CLICKHOUSE_DATABASE}.dest")
56+
if [ "$count" -ge 100 ]; then
57+
break
58+
fi
59+
sleep 0.5
60+
done
61+
62+
$CLICKHOUSE_CLIENT -q "SELECT count(), sum(id) FROM ${CLICKHOUSE_DATABASE}.dest"
63+
64+
# Cleanup
65+
$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS ${CLICKHOUSE_DATABASE}.mv SYNC"
66+
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.queue SYNC"
67+
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.dest SYNC"

0 commit comments

Comments
 (0)