Skip to content

Commit 415d916

Browse files
Backport #80386 to 25.3: Fix Data Lake tests concurrent run
1 parent f3906a1 commit 415d916

File tree

5 files changed

+83
-40
lines changed

5 files changed

+83
-40
lines changed
Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
services:
22
glue:
3-
image: motoserver/moto
4-
container_name: glue
3+
image: motoserver/moto:5.1.5
54
ports:
65
- 3000:3000
76
environment:
@@ -12,12 +11,12 @@ services:
1211
timeout: 5s
1312
retries: 10
1413
start_period: 30s
14+
# TODO: can we simply use with_minio=True instead?
1515
minio:
16-
image: minio/minio
17-
container_name: minio
16+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
1817
environment:
1918
- MINIO_ROOT_USER=minio
20-
- MINIO_ROOT_PASSWORD=minio123
19+
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
2120
- MINIO_DOMAIN=minio
2221
networks:
2322
default:
@@ -27,20 +26,21 @@ services:
2726
- 9001:9001
2827
- 9002:9000
2928
command: ["server", "/data", "--console-address", ":9001"]
29+
# TODO: move this code to cluster.py
3030
mc:
3131
depends_on:
3232
- minio
33-
image: minio/mc
34-
container_name: mc
33+
# Stick to version with "mc config"
34+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
3535
environment:
3636
- AWS_ACCESS_KEY_ID=minio
37-
- AWS_SECRET_ACCESS_KEY=minio123
37+
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
3838
- AWS_REGION=us-east-1
3939
entrypoint: >
4040
/bin/sh -c "
41-
until (/usr/bin/mc config host add minio http://minio:9000 minio minio123) do echo '...waiting...' && sleep 1; done;
42-
/usr/bin/mc rm -r --force minio/warehouse;
43-
/usr/bin/mc mb minio/warehouse --ignore-existing;
44-
/usr/bin/mc policy set public minio/warehouse;
41+
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
42+
/usr/bin/mc rm -r --force minio/warehouse-glue;
43+
/usr/bin/mc mb minio/warehouse-glue --ignore-existing;
44+
/usr/bin/mc policy set public minio/warehouse-glue;
4545
tail -f /dev/null
4646
"

tests/integration/compose/docker_compose_iceberg_rest_catalog.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
services:
22
spark-iceberg:
3-
image: tabulario/spark-iceberg
4-
container_name: spark-iceberg
3+
image: tabulario/spark-iceberg:3.5.5_1.8.1
54
build: spark/
65
depends_on:
76
rest:
@@ -17,7 +16,7 @@ services:
1716
- 10000:10000
1817
- 10001:10001
1918
rest:
20-
image: tabulario/iceberg-rest
19+
image: tabulario/iceberg-rest:1.6.0
2120
ports:
2221
- 8182:8181
2322
environment:
@@ -33,35 +32,36 @@ services:
3332
timeout: 5s
3433
retries: 10
3534
start_period: 30s
35+
# TODO: can we simply use with_minio=True instead?
3636
minio:
37-
image: minio/minio
38-
container_name: minio
37+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
3938
environment:
4039
- MINIO_ROOT_USER=minio
4140
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
4241
- MINIO_DOMAIN=minio
4342
networks:
4443
default:
4544
aliases:
46-
- warehouse.minio
45+
- warehouse-rest.minio
4746
ports:
4847
- 9001:9001
4948
- 9002:9000
5049
command: ["server", "/data", "--console-address", ":9001"]
50+
# TODO: move this code to cluster.py
5151
mc:
5252
depends_on:
5353
- minio
54-
image: minio/mc
55-
container_name: mc
54+
# Stick to version with "mc config"
55+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
5656
environment:
5757
- AWS_ACCESS_KEY_ID=minio
5858
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
5959
- AWS_REGION=us-east-1
6060
entrypoint: >
6161
/bin/sh -c "
6262
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
63-
/usr/bin/mc rm -r --force minio/warehouse;
64-
/usr/bin/mc mb minio/warehouse --ignore-existing;
65-
/usr/bin/mc policy set public minio/warehouse;
63+
/usr/bin/mc rm -r --force minio/warehouse-rest;
64+
/usr/bin/mc mb minio/warehouse-rest --ignore-existing;
65+
/usr/bin/mc policy set public minio/warehouse-rest;
6666
tail -f /dev/null
6767
"

tests/integration/helpers/cluster.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,17 +1389,18 @@ def setup_glue_catalog_cmd(
13891389
),
13901390
]
13911391
)
1392-
self.base_iceberg_catalog_cmd = self.compose_cmd(
1392+
self.base_glue_catalog_cmd = self.compose_cmd(
13931393
"--env-file",
13941394
instance.env_file,
13951395
"--file",
13961396
p.join(docker_compose_yml_dir, "docker_compose_glue_catalog.yml"),
13971397
)
1398-
return self.base_iceberg_catalog_cmd
1398+
return self.base_glue_catalog_cmd
13991399

14001400
def setup_iceberg_catalog_cmd(
14011401
self, instance, env_variables, docker_compose_yml_dir
14021402
):
1403+
self.with_iceberg_catalog = True
14031404
self.base_cmd.extend(
14041405
[
14051406
"--file",
@@ -2473,6 +2474,34 @@ def wait_mongo_to_start(self, timeout=30, secure=False):
24732474
logging.debug("Can't connect to Mongo " + str(ex))
24742475
time.sleep(1)
24752476

2477+
2478+
def wait_custom_minio_to_start(self, buckets, host, port, timeout=180):
2479+
ip = self.get_instance_ip(host)
2480+
minio_client = Minio(
2481+
f"{ip}:{port}",
2482+
access_key=minio_access_key,
2483+
secret_key=minio_secret_key,
2484+
secure=False,
2485+
http_client=urllib3.PoolManager(cert_reqs="CERT_NONE"),
2486+
)
2487+
start = time.time()
2488+
while time.time() - start < timeout:
2489+
try:
2490+
minio_client.list_buckets()
2491+
2492+
logging.debug("Connected to Minio.")
2493+
2494+
if all(minio_client.bucket_exists(bucket) for bucket in buckets):
2495+
return
2496+
2497+
time.sleep(1)
2498+
except Exception as ex:
2499+
logging.debug("Can't connect to Minio: %s", str(ex))
2500+
time.sleep(1)
2501+
2502+
2503+
raise Exception("Can't wait Minio to start")
2504+
24762505
def wait_minio_to_start(self, timeout=180, secure=False):
24772506
self.minio_ip = self.get_instance_ip(self.minio_host)
24782507
self.minio_redirect_ip = self.get_instance_ip(self.minio_redirect_host)
@@ -2998,6 +3027,18 @@ def get_feature_flag_value(feature_flag):
29983027
logging.info("Trying to connect to Minio...")
29993028
self.wait_minio_to_start(secure=self.minio_certs_dir is not None)
30003029

3030+
if self.with_glue_catalog and self.base_glue_catalog_cmd:
3031+
logging.info("Trying to connect to Minio for glue catalog...")
3032+
subprocess_check_call(self.base_glue_catalog_cmd + common_opts)
3033+
self.up_called = True
3034+
self.wait_custom_minio_to_start(['warehouse-glue'], 'minio', 9000)
3035+
3036+
if self.with_iceberg_catalog and self.base_iceberg_catalog_cmd:
3037+
logging.info("Trying to connect to Minio for Iceberg catalog...")
3038+
subprocess_check_call(self.base_iceberg_catalog_cmd + common_opts)
3039+
self.up_called = True
3040+
self.wait_custom_minio_to_start(['warehouse-rest'], 'minio', 9000)
3041+
30013042
if self.with_azurite and self.base_azurite_cmd:
30023043
azurite_start_cmd = self.base_azurite_cmd + common_opts
30033044
logging.info(

tests/integration/test_database_glue/test.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pyiceberg.schema import Schema
1919
from pyiceberg.table.sorting import SortField, SortOrder
2020
from pyiceberg.transforms import DayTransform, IdentityTransform
21+
from helpers.config_cluster import minio_access_key, minio_secret_key
2122
from pyiceberg.types import (
2223
DoubleType,
2324
FloatType,
@@ -60,7 +61,7 @@
6061
),
6162
)
6263

63-
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
64+
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-glue/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
6465

6566
DEFAULT_PARTITION_SPEC = PartitionSpec(
6667
PartitionField(
@@ -84,10 +85,11 @@ def load_catalog_impl(started_cluster):
8485
"type": "glue",
8586
"glue.endpoint": BASE_URL_LOCAL_HOST,
8687
"glue.region": "us-east-1",
87-
"s3.endpoint": "http://localhost:9002",
88-
"s3.access-key-id": "minio",
89-
"s3.secret-access-key": "minio123",
90-
},)
88+
"s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
89+
"s3.access-key-id": minio_access_key,
90+
"s3.secret-access-key": minio_secret_key,
91+
},
92+
)
9193

9294
def create_table(
9395
catalog,
@@ -100,7 +102,7 @@ def create_table(
100102
return catalog.create_table(
101103
identifier=f"{namespace}.{table}",
102104
schema=schema,
103-
location=f"s3://warehouse/data",
105+
location="s3://warehouse-glue/data",
104106
partition_spec=partition_spec,
105107
sort_order=sort_order,
106108
)
@@ -122,7 +124,7 @@ def create_clickhouse_glue_database(
122124
settings = {
123125
"catalog_type": "glue",
124126
"warehouse": "test",
125-
"storage_endpoint": "http://minio:9000/warehouse",
127+
"storage_endpoint": "http://minio:9000/warehouse-glue",
126128
"region": "us-east-1",
127129
}
128130

@@ -132,7 +134,7 @@ def create_clickhouse_glue_database(
132134
f"""
133135
DROP DATABASE IF EXISTS {name};
134136
SET allow_experimental_database_glue_catalog=true;
135-
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'minio123')
137+
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', '{minio_access_key}', '{minio_secret_key}')
136138
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}
137139
"""
138140
)

tests/integration/test_database_iceberg/test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
)
2828

2929
from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm
30-
from helpers.config_cluster import minio_secret_key
30+
from helpers.config_cluster import minio_secret_key, minio_access_key
3131
from helpers.s3_tools import get_file_contents, list_s3_objects, prepare_s3_bucket
3232
from helpers.test_tools import TSV, csv_compare
3333
from helpers.config_cluster import minio_secret_key
@@ -60,7 +60,7 @@
6060
),
6161
)
6262

63-
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
63+
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-rest/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
6464

6565
DEFAULT_PARTITION_SPEC = PartitionSpec(
6666
PartitionField(
@@ -85,9 +85,9 @@ def load_catalog_impl(started_cluster):
8585
**{
8686
"uri": BASE_URL_LOCAL_RAW,
8787
"type": "rest",
88-
"s3.endpoint": f"http://localhost:9002",
89-
"s3.access-key-id": "minio",
90-
"s3.secret-access-key": "ClickHouse_Minio_P@ssw0rd",
88+
"s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
89+
"s3.access-key-id": minio_access_key,
90+
"s3.secret-access-key": minio_secret_key,
9191
},
9292
)
9393

@@ -103,7 +103,7 @@ def create_table(
103103
return catalog.create_table(
104104
identifier=f"{namespace}.{table}",
105105
schema=schema,
106-
location=f"s3://warehouse/data",
106+
location=f"s3://warehouse-rest/data",
107107
partition_spec=partition_spec,
108108
sort_order=sort_order,
109109
)
@@ -125,7 +125,7 @@ def create_clickhouse_iceberg_database(
125125
settings = {
126126
"catalog_type": "rest",
127127
"warehouse": "demo",
128-
"storage_endpoint": "http://minio:9000/warehouse",
128+
"storage_endpoint": "http://minio:9000/warehouse-rest",
129129
}
130130

131131
settings.update(additional_settings)

0 commit comments

Comments
 (0)