You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# Generate unique query_ids for each export to track them in part_log
55
+
export_query_id_1="export_${RANDOM}_1"
56
+
export_query_id_2="export_${RANDOM}_2"
57
+
export_query_id_3="export_${RANDOM}_3"
58
+
export_query_id_4="export_${RANDOM}_4"
59
+
47
60
# this should generate ~4 files
48
-
query "ALTER TABLE $big_table EXPORT PART '$big_part_max_bytes' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=3500000, output_format_parquet_row_group_size_bytes=1000000"
61
+
query "ALTER TABLE $big_table EXPORT PART '$big_part_max_bytes' TO TABLE $big_destination_max_bytes SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_bytes_per_file=3500000, output_format_parquet_row_group_size_bytes=1000000""$export_query_id_1"
49
62
# export_merge_tree_part_max_rows_per_file = 1048576 (which is 4194304/4) to generate 4 files
50
-
query "ALTER TABLE $big_table EXPORT PART '$big_part_max_rows' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=1048576"
63
+
query "ALTER TABLE $big_table EXPORT PART '$big_part_max_rows' TO TABLE $big_destination_max_rows SETTINGS allow_experimental_export_merge_tree_part = 1, export_merge_tree_part_max_rows_per_file=1048576""$export_query_id_2"
51
64
52
65
echo"---- Table function with schema inheritance (no schema specified)"
53
-
query "ALTER TABLE $mt_table_tf EXPORT PART '2022_1_1_0' TO TABLE FUNCTION s3(s3_conn, filename='$tf_schema_inherit', format='Parquet', partition_strategy='hive') PARTITION BY year SETTINGS allow_experimental_export_merge_tree_part = 1"
66
+
query "ALTER TABLE $mt_table_tf EXPORT PART '2022_1_1_0' TO TABLE FUNCTION s3(s3_conn, filename='$tf_schema_inherit', format='Parquet', partition_strategy='hive') PARTITION BY year SETTINGS allow_experimental_export_merge_tree_part = 1""$export_query_id_3"
54
67
55
68
echo"---- Table function with explicit compatible schema"
56
-
query "ALTER TABLE $mt_table_tf EXPORT PART '2023_2_2_0' TO TABLE FUNCTION s3(s3_conn, filename='$tf_schema_explicit', format='Parquet', structure='id UInt64, value String, year UInt16', partition_strategy='hive') PARTITION BY year SETTINGS allow_experimental_export_merge_tree_part = 1"
69
+
query "ALTER TABLE $mt_table_tf EXPORT PART '2023_2_2_0' TO TABLE FUNCTION s3(s3_conn, filename='$tf_schema_explicit', format='Parquet', structure='id UInt64, value String, year UInt16', partition_strategy='hive') PARTITION BY year SETTINGS allow_experimental_export_merge_tree_part = 1""$export_query_id_4"
57
70
58
71
# Wait for all exports to complete
59
72
wait_for_exports() {
@@ -65,10 +78,14 @@ wait_for_exports() {
65
78
echo"Waiting for exports to complete (timeout: ${timeout}s)..."
66
79
67
80
while [ $elapsed-lt$timeout ];do
68
-
# Check if any exports are still in progress for our tables/parts
69
-
local active_exports=$(query "SELECT count() FROM system.exports WHERE (source_table = '$big_table' AND part_name IN ('$big_part_max_bytes', '$big_part_max_rows')) OR (source_table = '$mt_table_tf' AND part_name IN ('2022_1_1_0', '2023_2_2_0'))"| tr -d '\n')
81
+
# Flush logs to ensure part_log entries are visible
82
+
query "SYSTEM FLUSH LOGS"> /dev/null 2>&1||true
83
+
84
+
# Wait for part_log entries - these are written synchronously when export completes
85
+
# Check if all expected exports have corresponding part_log entries by query_id
86
+
local completed_count=$(query "SELECT count() FROM system.part_log WHERE event_type = 'ExportPart' AND query_id IN ('$export_query_id_1', '$export_query_id_2', '$export_query_id_3', '$export_query_id_4')"| tr -d '\n')
70
87
71
-
if [ "$active_exports"="0" ];then
88
+
if [ "$completed_count"="4" ];then
72
89
echo"All exports completed."
73
90
return 0
74
91
fi
@@ -78,8 +95,11 @@ wait_for_exports() {
78
95
done
79
96
80
97
echo"Timeout waiting for exports to complete after ${timeout}s"
81
-
echo"Remaining exports:"
82
-
query "SELECT source_table, part_name, elapsed, rows_read, total_rows_to_read FROM system.exports WHERE (source_table = '$big_table' AND part_name IN ('$big_part_max_bytes', '$big_part_max_rows')) OR (source_table = '$mt_table_tf' AND part_name IN ('2022_1_1_0', '2023_2_2_0'))"
98
+
query "SYSTEM FLUSH LOGS"> /dev/null 2>&1||true
99
+
echo"Completed exports in part_log:"
100
+
query "SELECT query_id, table, part_name, event_time FROM system.part_log WHERE event_type = 'ExportPart' AND query_id IN ('$export_query_id_1', '$export_query_id_2', '$export_query_id_3', '$export_query_id_4')"
101
+
echo"Remaining exports in system.exports:"
102
+
query "SELECT source_table, part_name, elapsed, rows_read, total_rows_to_read FROM system.exports WHERE ((source_table = '$big_table' AND part_name IN ('$big_part_max_bytes', '$big_part_max_rows')) OR (source_table = '$mt_table_tf' AND part_name IN ('2022_1_1_0', '2023_2_2_0')))"
0 commit comments