Skip to content

Commit 295ac92

Browse files
Backport #80386 to 25.5: Fix Data Lake tests concurrent run
1 parent a8b9eaa commit 295ac92

File tree

8 files changed

+112
-53
lines changed

8 files changed

+112
-53
lines changed
Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
services:
22
glue:
33
image: motoserver/moto
4-
container_name: glue
54
ports:
65
- 3000:3000
76
environment:
@@ -12,11 +11,12 @@ services:
1211
timeout: 5s
1312
retries: 10
1413
start_period: 30s
14+
# TODO: can we simply use with_minio=True instead?
1515
minio:
16-
image: minio/minio
16+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
1717
environment:
1818
- MINIO_ROOT_USER=minio
19-
- MINIO_ROOT_PASSWORD=minio123
19+
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
2020
- MINIO_DOMAIN=minio
2121
networks:
2222
default:
@@ -26,19 +26,21 @@ services:
2626
- 9001:9001
2727
- 9002:9000
2828
command: ["server", "/data", "--console-address", ":9001"]
29+
# TODO: move this code to cluster.py
2930
mc:
3031
depends_on:
3132
- minio
32-
image: minio/mc
33+
# Stick to version with "mc config"
34+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
3335
environment:
3436
- AWS_ACCESS_KEY_ID=minio
35-
- AWS_SECRET_ACCESS_KEY=minio123
37+
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
3638
- AWS_REGION=us-east-1
3739
entrypoint: >
3840
/bin/sh -c "
39-
until (/usr/bin/mc config host add minio http://minio:9000 minio minio123) do echo '...waiting...' && sleep 1; done;
40-
/usr/bin/mc rm -r --force minio/warehouse;
41-
/usr/bin/mc mb minio/warehouse --ignore-existing;
42-
/usr/bin/mc policy set public minio/warehouse;
41+
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
42+
/usr/bin/mc rm -r --force minio/warehouse-glue;
43+
/usr/bin/mc mb minio/warehouse-glue --ignore-existing;
44+
/usr/bin/mc policy set public minio/warehouse-glue;
4345
tail -f /dev/null
4446
"

tests/integration/compose/docker_compose_iceberg_hms_catalog.yml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
services:
22
spark-iceberg:
3-
image: tabulario/spark-iceberg
3+
image: tabulario/spark-iceberg:3.5.5_1.8.1
44
build: spark/
55
depends_on:
66
hive:
@@ -17,7 +17,7 @@ services:
1717
- 10001:10001
1818

1919
hive:
20-
build: ./../../hms_extensions/
20+
build: ./../../integration/test_database_hms/hms_extensions/
2121
restart: unless-stopped
2222
depends_on:
2323
minio:
@@ -26,19 +26,20 @@ services:
2626
- "9083:9083"
2727
environment:
2828
SERVICE_NAME: "metastore"
29-
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/data/"
29+
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse-hms/data/"
3030
healthcheck:
3131
test: ["CMD", "bash", "-c", "echo > /dev/tcp/localhost/9083"]
3232
interval: 2s
3333
timeout: 5s
3434
retries: 15
3535
start_period: 45s
3636

37+
# TODO: can we simply use with_minio=True instead?
3738
minio:
38-
image: minio/minio
39+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
3940
environment:
4041
- MINIO_ROOT_USER=minio
41-
- MINIO_ROOT_PASSWORD=minio123
42+
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
4243
- MINIO_DOMAIN=minio
4344
networks:
4445
default:
@@ -49,19 +50,21 @@ services:
4950
- 9002:9000
5051
command: ["server", "/data", "--console-address", ":9001"]
5152

53+
# TODO: move this code to cluster.py
5254
mc:
5355
depends_on:
5456
- minio
55-
image: minio/mc
57+
# Stick to version with "mc config"
58+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
5659
environment:
5760
- AWS_ACCESS_KEY_ID=minio
58-
- AWS_SECRET_ACCESS_KEY=minio123
61+
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
5962
- AWS_REGION=us-east-1
6063
entrypoint: >
6164
/bin/sh -c "
62-
until (/usr/bin/mc config host add minio http://minio:9000 minio minio123) do echo '...waiting...' && sleep 1; done;
63-
/usr/bin/mc rm -r --force minio/warehouse;
64-
/usr/bin/mc mb minio/warehouse --ignore-existing;
65-
/usr/bin/mc policy set public minio/warehouse;
65+
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
66+
/usr/bin/mc rm -r --force minio/warehouse-hms;
67+
/usr/bin/mc mb minio/warehouse-hms --ignore-existing;
68+
/usr/bin/mc policy set public minio/warehouse-hms;
6669
tail -f /dev/null
6770
"

tests/integration/compose/docker_compose_iceberg_rest_catalog.yml

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
services:
22
spark-iceberg:
3-
image: tabulario/spark-iceberg
3+
image: tabulario/spark-iceberg:3.5.5_1.8.1
44
build: spark/
55
depends_on:
66
rest:
@@ -16,7 +16,7 @@ services:
1616
- 10000:10000
1717
- 10001:10001
1818
rest:
19-
image: tabulario/iceberg-rest
19+
image: tabulario/iceberg-rest:1.6.0
2020
ports:
2121
- 8182:8181
2222
environment:
@@ -32,33 +32,36 @@ services:
3232
timeout: 5s
3333
retries: 10
3434
start_period: 30s
35+
# TODO: can we simply use with_minio=True instead?
3536
minio:
36-
image: minio/minio
37+
image: minio/minio:RELEASE.2024-07-31T05-46-26Z
3738
environment:
3839
- MINIO_ROOT_USER=minio
3940
- MINIO_ROOT_PASSWORD=ClickHouse_Minio_P@ssw0rd
4041
- MINIO_DOMAIN=minio
4142
networks:
4243
default:
4344
aliases:
44-
- warehouse.minio
45+
- warehouse-rest.minio
4546
ports:
4647
- 9001:9001
4748
- 9002:9000
4849
command: ["server", "/data", "--console-address", ":9001"]
50+
# TODO: move this code to cluster.py
4951
mc:
5052
depends_on:
5153
- minio
52-
image: minio/mc
54+
# Stick to version with "mc config"
55+
image: minio/mc:RELEASE.2025-04-16T18-13-26Z
5356
environment:
5457
- AWS_ACCESS_KEY_ID=minio
5558
- AWS_SECRET_ACCESS_KEY=ClickHouse_Minio_P@ssw0rd
5659
- AWS_REGION=us-east-1
5760
entrypoint: >
5861
/bin/sh -c "
5962
until (/usr/bin/mc config host add minio http://minio:9000 minio ClickHouse_Minio_P@ssw0rd) do echo '...waiting...' && sleep 1; done;
60-
/usr/bin/mc rm -r --force minio/warehouse;
61-
/usr/bin/mc mb minio/warehouse --ignore-existing;
62-
/usr/bin/mc policy set public minio/warehouse;
63+
/usr/bin/mc rm -r --force minio/warehouse-rest;
64+
/usr/bin/mc mb minio/warehouse-rest --ignore-existing;
65+
/usr/bin/mc policy set public minio/warehouse-rest;
6366
tail -f /dev/null
6467
"

tests/integration/helpers/cluster.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,24 +1421,26 @@ def setup_minio_cmd(self, instance, env_variables, docker_compose_yml_dir):
14211421
return self.base_minio_cmd
14221422

14231423
def setup_glue_catalog_cmd(self, instance, env_variables, docker_compose_yml_dir):
1424+
self.with_glue_catalog = True
14241425
self.base_cmd.extend(
14251426
[
14261427
"--file",
14271428
p.join(docker_compose_yml_dir, "docker_compose_glue_catalog.yml"),
14281429
]
14291430
)
1430-
self.base_iceberg_catalog_cmd = self.compose_cmd(
1431+
self.base_glue_catalog_cmd = self.compose_cmd(
14311432
"--env-file",
14321433
instance.env_file,
14331434
"--file",
14341435
p.join(docker_compose_yml_dir, "docker_compose_glue_catalog.yml"),
14351436
)
1436-
return self.base_iceberg_catalog_cmd
1437+
return self.base_glue_catalog_cmd
14371438

14381439

14391440
def setup_hms_catalog_cmd(
14401441
self, instance, env_variables, docker_compose_yml_dir
14411442
):
1443+
self.with_hms_catalog = True
14421444
self.base_cmd.extend(
14431445
[
14441446
"--file",
@@ -1460,6 +1462,7 @@ def setup_hms_catalog_cmd(
14601462
def setup_iceberg_catalog_cmd(
14611463
self, instance, env_variables, docker_compose_yml_dir
14621464
):
1465+
self.with_iceberg_catalog = True
14631466
self.base_cmd.extend(
14641467
[
14651468
"--file",
@@ -2591,6 +2594,34 @@ def wait_mongo_to_start(self, timeout=30, secure=False):
25912594
logging.debug("Can't connect to Mongo " + str(ex))
25922595
time.sleep(1)
25932596

2597+
2598+
def wait_custom_minio_to_start(self, buckets, host, port, timeout=180):
2599+
ip = self.get_instance_ip(host)
2600+
minio_client = Minio(
2601+
f"{ip}:{port}",
2602+
access_key=minio_access_key,
2603+
secret_key=minio_secret_key,
2604+
secure=False,
2605+
http_client=urllib3.PoolManager(cert_reqs="CERT_NONE"),
2606+
)
2607+
start = time.time()
2608+
while time.time() - start < timeout:
2609+
try:
2610+
minio_client.list_buckets()
2611+
2612+
logging.debug("Connected to Minio.")
2613+
2614+
if all(minio_client.bucket_exists(bucket) for bucket in buckets):
2615+
return
2616+
2617+
time.sleep(1)
2618+
except Exception as ex:
2619+
logging.debug("Can't connect to Minio: %s", str(ex))
2620+
time.sleep(1)
2621+
2622+
2623+
raise Exception("Can't wait Minio to start")
2624+
25942625
def wait_minio_to_start(self, timeout=180, secure=False):
25952626
self.minio_ip = self.get_instance_ip(self.minio_host)
25962627
self.minio_redirect_ip = self.get_instance_ip(self.minio_redirect_host)
@@ -3126,6 +3157,24 @@ def get_feature_flag_value(feature_flag):
31263157
logging.info("Trying to connect to Minio...")
31273158
self.wait_minio_to_start(secure=self.minio_certs_dir is not None)
31283159

3160+
if self.with_glue_catalog and self.base_glue_catalog_cmd:
3161+
logging.info("Trying to connect to Minio for glue catalog...")
3162+
subprocess_check_call(self.base_glue_catalog_cmd + common_opts)
3163+
self.up_called = True
3164+
self.wait_custom_minio_to_start(['warehouse-glue'], 'minio', 9000)
3165+
3166+
if self.with_hms_catalog and self.base_iceberg_hms_cmd:
3167+
logging.info("Trying to connect to Minio for hms catalog...")
3168+
subprocess_check_call(self.base_iceberg_hms_cmd + common_opts)
3169+
self.up_called = True
3170+
self.wait_custom_minio_to_start(['warehouse-hms'], 'minio', 9000)
3171+
3172+
if self.with_iceberg_catalog and self.base_iceberg_catalog_cmd:
3173+
logging.info("Trying to connect to Minio for Iceberg catalog...")
3174+
subprocess_check_call(self.base_iceberg_catalog_cmd + common_opts)
3175+
self.up_called = True
3176+
self.wait_custom_minio_to_start(['warehouse-rest'], 'minio', 9000)
3177+
31293178
if self.with_azurite and self.base_azurite_cmd:
31303179
azurite_start_cmd = self.base_azurite_cmd + common_opts
31313180
logging.info(

tests/integration/test_database_glue/test.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pyiceberg.schema import Schema
1414
from pyiceberg.table.sorting import SortField, SortOrder
1515
from pyiceberg.transforms import DayTransform, IdentityTransform
16+
from helpers.config_cluster import minio_access_key, minio_secret_key
1617
from pyiceberg.types import (
1718
DoubleType,
1819
FloatType,
@@ -53,7 +54,7 @@
5354
),
5455
)
5556

56-
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
57+
DEFAULT_CREATE_TABLE = "CREATE TABLE {}.`{}.{}`\\n(\\n `datetime` Nullable(DateTime64(6)),\\n `symbol` Nullable(String),\\n `bid` Nullable(Float64),\\n `ask` Nullable(Float64),\\n `details` Tuple(created_by Nullable(String))\\n)\\nENGINE = Iceberg(\\'http://minio:9000/warehouse-glue/data/\\', \\'minio\\', \\'[HIDDEN]\\')\n"
5758

5859
DEFAULT_PARTITION_SPEC = PartitionSpec(
5960
PartitionField(
@@ -79,9 +80,9 @@ def load_catalog_impl(started_cluster):
7980
"type": "glue",
8081
"glue.endpoint": BASE_URL_LOCAL_HOST,
8182
"glue.region": "us-east-1",
82-
"s3.endpoint": "http://localhost:9002",
83-
"s3.access-key-id": "minio",
84-
"s3.secret-access-key": "minio123",
83+
"s3.endpoint": f"http://{started_cluster.get_instance_ip('minio')}:9000",
84+
"s3.access-key-id": minio_access_key,
85+
"s3.secret-access-key": minio_secret_key,
8586
},
8687
)
8788

@@ -97,7 +98,7 @@ def create_table(
9798
return catalog.create_table(
9899
identifier=f"{namespace}.{table}",
99100
schema=schema,
100-
location=f"s3://warehouse/data",
101+
location="s3://warehouse-glue/data",
101102
partition_spec=partition_spec,
102103
sort_order=sort_order,
103104
)
@@ -119,7 +120,7 @@ def create_clickhouse_glue_database(
119120
settings = {
120121
"catalog_type": "glue",
121122
"warehouse": "test",
122-
"storage_endpoint": "http://minio:9000/warehouse",
123+
"storage_endpoint": "http://minio:9000/warehouse-glue",
123124
"region": "us-east-1",
124125
}
125126

@@ -129,7 +130,7 @@ def create_clickhouse_glue_database(
129130
f"""
130131
DROP DATABASE IF EXISTS {name};
131132
SET allow_experimental_database_glue_catalog=true;
132-
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', 'minio', 'minio123')
133+
CREATE DATABASE {name} ENGINE = DataLakeCatalog('{BASE_URL}', '{minio_access_key}', '{minio_secret_key}')
133134
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}
134135
"""
135136
)

tests/integration/test_database_hms/hms_extensions/core-site.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<configuration>
55
<property>
66
<name>fs.defaultFS</name>
7-
<value>s3a://warehouse/data</value>
7+
<value>s3a://warehouse-hms/data</value>
88
</property>
99
<property>
1010
<name>fs.s3a.impl</name>
@@ -20,7 +20,7 @@
2020
</property>
2121
<property>
2222
<name>fs.s3a.secret.key</name>
23-
<value>minio123</value>
23+
<value>ClickHouse_Minio_P@ssw0rd</value>
2424
</property>
2525
<property>
2626
<name>fs.s3a.endpoint</name>

0 commit comments

Comments
 (0)