Skip to content

Commit fbf7fab

Browse files
Backport #80386 to 25.4: Fix Data Lake tests concurrent run
1 parent efc7098 commit fbf7fab

File tree

5 files changed

+85
-74
lines changed

5 files changed

+85
-74
lines changed
Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
services:
22
glue:
3-
image: motoserver/moto
4-
container_name: glue
3+
image: motoserver/moto:5.1.5
54
ports:
65
- 3000:3000
76
environment:
@@ -12,12 +11,12 @@ services:
1211
timeout: 5s
1312
retries: 10
1413
start_period: 30s
14+
# TODO: can we simply use with_minio=True instead?
1515
minio:
16-
image: minio/minio
17-
container_name: minio
16+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
1817
environment:
1918
- MINIO_ROOT_USER=minio
20-
- MINIO_ROOT_PASSWORD=minio123
19+
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
2120
- MINIO_DOMAIN=minio
2221
networks:
2322
default:
@@ -27,20 +26,21 @@ services:
2726
- 9001:9001
2827
- 9002:9000
2928
command: ["server", "/data", "--console-address", ":9001"]
29+
# TODO: move this code to cluster.py
3030
mc:
3131
depends_on:
3232
- minio
33-
image: minio/mc
34-
container_name: mc
33+
# Stick to version with "mc config"
34+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
3535
environment:
3636
- AWS_ACCESS_KEY_ID=minio
37-
- AWS_SECRET_ACCESS_KEY=minio123
37+
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
3838
- AWS_REGION=us-east-1
3939
entrypoint: >
4040
/bin/sh -c "
41-
until (/usr/bin/mc config host add minio http://minio:9000 minio minio123) do echo '...waiting...' && sleep 1; done;
42-
/usr/bin/mc rm -r --force minio/warehouse;
43-
/usr/bin/mc mb minio/warehouse --ignore-existing;
44-
/usr/bin/mc policy set public minio/warehouse;
41+
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
42+
/usr/bin/mc rm -r --force minio/warehouse-glue;
43+
/usr/bin/mc mb minio/warehouse-glue --ignore-existing;
44+
/usr/bin/mc policy set public minio/warehouse-glue;
4545
tail -f /dev/null
4646
"

tests/integration/compose/docker_compose_iceberg_rest_catalog.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
services:
22
spark-iceberg:
3-
image: tabulario/spark-iceberg
4-
container_name: spark-iceberg
3+
image: tabulario/spark-iceberg:3.5.5_1.8.1
54
build: spark/
65
depends_on:
76
rest:
@@ -17,7 +16,7 @@ services:
1716
- 10000:10000
1817
- 10001:10001
1918
rest:
20-
image: tabulario/iceberg-rest
19+
image: tabulario/iceberg-rest:1.6.0
2120
ports:
2221
- 8182:8181
2322
environment:
@@ -33,35 +32,36 @@ services:
3332
timeout: 5s
3433
retries: 10
3534
start_period: 30s
35+
# TODO: can we simply use with_minio=True instead?
3636
minio:
37-
image: minio/minio
38-
container_name: minio
37+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
3938
environment:
4039
- MINIO_ROOT_USER=minio
4140
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
4241
- MINIO_DOMAIN=minio
4342
networks:
4443
default:
4544
aliases:
46-
- warehouse.minio
45+
- warehouse-rest.minio
4746
ports:
4847
- 9001:9001
4948
- 9002:9000
5049
command: ["server", "/data", "--console-address", ":9001"]
50+
# TODO: move this code to cluster.py
5151
mc:
5252
depends_on:
5353
- minio
54-
image: minio/mc
55-
container_name: mc
54+
# Stick to version with "mc config"
55+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
5656
environment:
5757
- AWS_ACCESS_KEY_ID=minio
5858
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
5959
- AWS_REGION=us-east-1
6060
entrypoint: >
6161
/bin/sh -c "
6262
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
63-
/usr/bin/mc rm -r --force minio/warehouse;
64-
/usr/bin/mc mb minio/warehouse --ignore-existing;
65-
/usr/bin/mc policy set public minio/warehouse;
63+
/usr/bin/mc rm -r --force minio/warehouse-rest;
64+
/usr/bin/mc mb minio/warehouse-rest --ignore-existing;
65+
/usr/bin/mc policy set public minio/warehouse-rest;
6666
tail -f /dev/null
6767
"

tests/integration/helpers/cluster.py

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,6 @@ def __init__(
549549
self.spark_session = None
550550
self.with_iceberg_catalog = False
551551
self.with_glue_catalog = False
552-
self.with_hms_catalog = False
553552

554553
self.with_azurite = False
555554
self.azurite_container = "azurite-container"
@@ -1421,45 +1420,25 @@ def setup_minio_cmd(self, instance, env_variables, docker_compose_yml_dir):
14211420
return self.base_minio_cmd
14221421

14231422
def setup_glue_catalog_cmd(self, instance, env_variables, docker_compose_yml_dir):
1423+
self.with_glue_catalog = True
14241424
self.base_cmd.extend(
14251425
[
14261426
"--file",
14271427
p.join(docker_compose_yml_dir, "docker_compose_glue_catalog.yml"),
14281428
]
14291429
)
1430-
self.base_iceberg_catalog_cmd = self.compose_cmd(
1430+
self.base_glue_catalog_cmd = self.compose_cmd(
14311431
"--env-file",
14321432
instance.env_file,
14331433
"--file",
14341434
p.join(docker_compose_yml_dir, "docker_compose_glue_catalog.yml"),
14351435
)
1436-
return self.base_iceberg_catalog_cmd
1437-
1438-
1439-
def setup_hms_catalog_cmd(
1440-
self, instance, env_variables, docker_compose_yml_dir
1441-
):
1442-
self.base_cmd.extend(
1443-
[
1444-
"--file",
1445-
p.join(
1446-
docker_compose_yml_dir, "docker_compose_iceberg_hms_catalog.yml"
1447-
),
1448-
]
1449-
)
1450-
1451-
self.base_iceberg_hms_cmd = self.compose_cmd(
1452-
"--env-file",
1453-
instance.env_file,
1454-
"--file",
1455-
p.join(docker_compose_yml_dir, "docker_compose_iceberg_hms_catalog.yml"),
1456-
)
1457-
return self.base_iceberg_hms_cmd
1458-
1436+
return self.base_glue_catalog_cmd
14591437

14601438
def setup_iceberg_catalog_cmd(
14611439
self, instance, env_variables, docker_compose_yml_dir
14621440
):
1441+
self.with_iceberg_catalog = True
14631442
self.base_cmd.extend(
14641443
[
14651444
"--file",
@@ -1645,7 +1624,6 @@ def add_instance(
16451624
with_prometheus=False,
16461625
with_iceberg_catalog=False,
16471626
with_glue_catalog=False,
1648-
with_hms_catalog=False,
16491627
handle_prometheus_remote_write=False,
16501628
handle_prometheus_remote_read=False,
16511629
use_old_analyzer=None,
@@ -1769,7 +1747,6 @@ def add_instance(
17691747
with_ldap=with_ldap,
17701748
with_iceberg_catalog=with_iceberg_catalog,
17711749
with_glue_catalog=with_glue_catalog,
1772-
with_hms_catalog=with_hms_catalog,
17731750
use_old_analyzer=use_old_analyzer,
17741751
use_distributed_plan=use_distributed_plan,
17751752
server_bin_path=self.server_bin_path,
@@ -1968,13 +1945,6 @@ def add_instance(
19681945
)
19691946
)
19701947

1971-
if with_hms_catalog and not self.with_hms_catalog:
1972-
cmds.append(
1973-
self.setup_hms_catalog_cmd(
1974-
instance, env_variables, docker_compose_yml_dir
1975-
)
1976-
)
1977-
19781948
if with_azurite and not self.with_azurite:
19791949
cmds.append(
19801950
self.setup_azurite_cmd(instance, env_variables, docker_compose_yml_dir)
@@ -2591,6 +2561,34 @@ def wait_mongo_to_start(self, timeout=30, secure=False):
25912561
logging.debug("Can't connect to Mongo " + str(ex))
25922562
time.sleep(1)
25932563

2564+
2565+
def wait_custom_minio_to_start(self, buckets, host, port, timeout=180):
2566+
ip = self.get_instance_ip(host)
2567+
minio_client = Minio(
2568+
f"{ip}:{port}",
2569+
access_key=minio_access_key,
2570+
secret_key=minio_secret_key,
2571+
secure=False,
2572+
http_client=urllib3.PoolManager(cert_reqs="CERT_NONE"),
2573+
)
2574+
start = time.time()
2575+
while time.time() - start < timeout:
2576+
try:
2577+
minio_client.list_buckets()
2578+
2579+
logging.debug("Connected to Minio.")
2580+
2581+
if all(minio_client.bucket_exists(bucket) for bucket in buckets):
2582+
return
2583+
2584+
time.sleep(1)
2585+
except Exception as ex:
2586+
logging.debug("Can't connect to Minio: %s", str(ex))
2587+
time.sleep(1)
2588+
2589+
2590+
raise Exception("Can't wait Minio to start")
2591+
25942592
def wait_minio_to_start(self, timeout=180, secure=False):
25952593
self.minio_ip = self.get_instance_ip(self.minio_host)
25962594
self.minio_redirect_ip = self.get_instance_ip(self.minio_redirect_host)
@@ -3126,6 +3124,18 @@ def get_feature_flag_value(feature_flag):
31263124
logging.info("Trying to connect to Minio...")
31273125
self.wait_minio_to_start(secure=self.minio_certs_dir is not None)
31283126

3127+
if self.with_glue_catalog and self.base_glue_catalog_cmd:
3128+
logging.info("Trying to connect to Minio for glue catalog...")
3129+
subprocess_check_call(self.base_glue_catalog_cmd + common_opts)
3130+
self.up_called = True
3131+
self.wait_custom_minio_to_start(['warehouse-glue'], 'minio', 9000)
3132+
3133+
if self.with_iceberg_catalog and self.base_iceberg_catalog_cmd:
3134+
logging.info("Trying to connect to Minio for Iceberg catalog...")
3135+
subprocess_check_call(self.base_iceberg_catalog_cmd + common_opts)
3136+
self.up_called = True
3137+
self.wait_custom_minio_to_start(['warehouse-rest'], 'minio', 9000)
3138+
31293139
if self.with_azurite and self.base_azurite_cmd:
31303140
azurite_start_cmd = self.base_azurite_cmd + common_opts
31313141
logging.info(
@@ -3531,7 +3541,6 @@ def __init__(
35313541
with_ldap,
35323542
with_iceberg_catalog,
35333543
with_glue_catalog,
3534-
with_hms_catalog,
35353544
use_old_analyzer,
35363545
use_distributed_plan,
35373546
server_bin_path,

tests/integration/test_database_glue/test.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pyiceberg.schema import Schema
1919
from pyiceberg.table.sorting import SortField, SortOrder
2020
from pyiceberg.transforms import DayTransform, IdentityTransform
21+
from helpers.config_cluster import minio_access_key, minio_secret_key
2122
from pyiceberg.types import (
2223
DoubleType,
2324
FloatType,
@@ -60,7 +61,7 @@
6061
),
6162
)
6263

63-
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
64+
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-glue/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
6465

6566
DEFAULT_PARTITION_SPEC = PartitionSpec(
6667
PartitionField(
@@ -84,10 +85,12 @@ def load_catalog_impl(started_cluster):
8485
"type": "glue",
8586
"glue.endpoint": BASE_URL_LOCAL_HOST,
8687
"glue.region": "us-east-1",
87-
"s3.endpoint": "http://localhost:9002",
88-
"s3.access-key-id": "minio",
89-
"s3.secret-access-key": "minio123",
90-
},)
88+
"s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
89+
"s3.access-key-id": minio_access_key,
90+
"s3.secret-access-key": minio_secret_key,
91+
},
92+
)
93+
9194

9295
def create_table(
9396
catalog,
@@ -100,7 +103,7 @@ def create_table(
100103
return catalog.create_table(
101104
identifier=f"{namespace}.{table}",
102105
schema=schema,
103-
location=f"s3://warehouse/data",
106+
location="s3://warehouse-glue/data",
104107
partition_spec=partition_spec,
105108
sort_order=sort_order,
106109
)
@@ -122,7 +125,7 @@ def create_clickhouse_glue_database(
122125
settings = {
123126
"catalog_type": "glue",
124127
"warehouse": "test",
125-
"storage_endpoint": "http://minio:9000/warehouse",
128+
"storage_endpoint": "http://minio:9000/warehouse-glue",
126129
"region": "us-east-1",
127130
}
128131

@@ -132,7 +135,7 @@ def create_clickhouse_glue_database(
132135
f"""
133136
DROP DATABASE IF EXISTS {name};
134137
SET allow_experimental_database_glue_catalog=true;
135-
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'minio123')
138+
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', '{minio_access_key}', '{minio_secret_key}')
136139
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}
137140
"""
138141
)

tests/integration/test_database_iceberg/test.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,9 @@
2727
)
2828

2929
from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm
30-
from helpers.config_cluster import minio_secret_key
30+
from helpers.config_cluster import minio_secret_key, minio_access_key
3131
from helpers.s3_tools import get_file_contents, list_s3_objects, prepare_s3_bucket
3232
from helpers.test_tools import TSV, csv_compare
33-
from helpers.config_cluster import minio_secret_key
3433

3534
BASE_URL = "http://rest:8181/v1"
3635
BASE_URL_LOCAL = "http://localhost:8182/v1"
@@ -60,7 +59,7 @@
6059
),
6160
)
6261

63-
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
62+
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-rest/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
6463

6564
DEFAULT_PARTITION_SPEC = PartitionSpec(
6665
PartitionField(
@@ -85,9 +84,9 @@ def load_catalog_impl(started_cluster):
8584
**{
8685
"uri": BASE_URL_LOCAL_RAW,
8786
"type": "rest",
88-
"s3.endpoint": f"http://localhost:9002",
89-
"s3.access-key-id": "minio",
90-
"s3.secret-access-key": "ClickHouse_Minio_P@ssw0rd",
87+
"s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
88+
"s3.access-key-id": minio_access_key,
89+
"s3.secret-access-key": minio_secret_key,
9190
},
9291
)
9392

@@ -103,7 +102,7 @@ def create_table(
103102
return catalog.create_table(
104103
identifier=f"{namespace}.{table}",
105104
schema=schema,
106-
location=f"s3://warehouse/data",
105+
location=f"s3://warehouse-rest/data",
107106
partition_spec=partition_spec,
108107
sort_order=sort_order,
109108
)
@@ -125,7 +124,7 @@ def create_clickhouse_iceberg_database(
125124
settings = {
126125
"catalog_type": "rest",
127126
"warehouse": "demo",
128-
"storage_endpoint": "http://minio:9000/warehouse",
127+
"storage_endpoint": "http://minio:9000/warehouse-rest",
129128
}
130129

131130
settings.update(additional_settings)

0 commit comments

Comments
 (0)