|
1 | 1 | import argparse |
2 | 2 | import os |
| 3 | +import re |
3 | 4 | import subprocess |
4 | 5 | import time |
5 | 6 | from pathlib import Path |
6 | | -from typing import List, Tuple |
| 7 | +from typing import List, Optional, Tuple |
7 | 8 |
|
8 | 9 | from more_itertools import tail |
9 | 10 |
|
@@ -103,6 +104,150 @@ def _start_docker_in_docker(): |
103 | 104 | print(f"Started docker-in-docker asynchronously with PID {dockerd_proc.pid}") |
104 | 105 |
|
105 | 106 |
|
| 107 | +_COMPOSE_DIR = Path("./tests/integration/compose") |
| 108 | + |
| 109 | +# Explicit mapping for with_* flags whose compose file name cannot be derived |
| 110 | +# by simply prepending "docker_compose_" and appending ".yml". |
| 111 | +_WITH_FLAG_TO_COMPOSE: dict[str, List[str]] = { |
| 112 | + "mysql57": ["docker_compose_mysql.yml"], |
| 113 | + "mysql8": ["docker_compose_mysql_8_0.yml"], |
| 114 | + "dremio26": ["docker_compose_dremio_26_0.yml"], |
| 115 | + "kerberos_kdc": ["docker_compose_kerberos_kdc.yml"], |
| 116 | + # with_iceberg_catalog can use any of the iceberg catalogs; include them all |
| 117 | + "iceberg_catalog": [ |
| 118 | + "docker_compose_iceberg_rest_catalog.yml", |
| 119 | + "docker_compose_iceberg_hms_catalog.yml", |
| 120 | + "docker_compose_iceberg_lakekeeper_catalog.yml", |
| 121 | + "docker_compose_iceberg_nessie_catalog.yml", |
| 122 | + ], |
| 123 | + "hms_catalog": ["docker_compose_iceberg_hms_catalog.yml"], |
| 124 | + "glue_catalog": ["docker_compose_glue_catalog.yml"], |
| 125 | + "prometheus_writer": ["docker_compose_prometheus.yml"], |
| 126 | + "prometheus_reader": ["docker_compose_prometheus.yml"], |
| 127 | + "prometheus_receiver": ["docker_compose_prometheus.yml"], |
| 128 | + # with_odbc_drivers implicitly sets up mysql8 + postgres |
| 129 | + "odbc_drivers": ["docker_compose_mysql_8_0.yml", "docker_compose_postgres.yml"], |
| 130 | + # Flags with no separate compose file of their own |
| 131 | + "jdbc_bridge": [], |
| 132 | + "net_trics": [], |
| 133 | +} |
| 134 | + |
| 135 | + |
| 136 | +def get_compose_files_for_test_modules(test_modules: List[str]) -> List[Path]: |
| 137 | + """Return compose files needed by the given test modules. |
| 138 | +
|
| 139 | + Grep every Python source file in each test suite directory for: |
| 140 | + - `with_X=True` patterns (mapped via `_WITH_FLAG_TO_COMPOSE` or the obvious |
| 141 | + `docker_compose_{X}.yml` naming convention), and |
| 142 | + - explicit `docker_compose_*.yml` file name strings (used e.g. via |
| 143 | + `extra_parameters={"docker_compose_file_name": "..."}` calls). |
| 144 | + """ |
| 145 | + needed: set[Path] = set() |
| 146 | + suite_dirs = {m.split("/")[0] for m in test_modules} |
| 147 | + |
| 148 | + for suite_dir in suite_dirs: |
| 149 | + suite_path = Path("./tests/integration/") / suite_dir |
| 150 | + if not suite_path.is_dir(): |
| 151 | + continue |
| 152 | + for py_file in suite_path.glob("**/*.py"): |
| 153 | + try: |
| 154 | + content = py_file.read_text(errors="replace") |
| 155 | + except OSError: |
| 156 | + continue |
| 157 | + |
| 158 | + # 1. with_X=True → compose file via mapping or naming convention |
| 159 | + for m in re.finditer(r"\bwith_(\w+)\s*=\s*True", content): |
| 160 | + flag = m.group(1) |
| 161 | + if flag in _WITH_FLAG_TO_COMPOSE: |
| 162 | + for fname in _WITH_FLAG_TO_COMPOSE[flag]: |
| 163 | + p = _COMPOSE_DIR / fname |
| 164 | + if p.exists(): |
| 165 | + needed.add(p) |
| 166 | + else: |
| 167 | + p = _COMPOSE_DIR / f"docker_compose_{flag}.yml" |
| 168 | + if p.exists(): |
| 169 | + needed.add(p) |
| 170 | + |
| 171 | + # 2. Directly named compose files (e.g. in extra_parameters dicts) |
| 172 | + for m in re.finditer(r"(docker_compose_\w+\.yml)", content): |
| 173 | + p = _COMPOSE_DIR / m.group(1) |
| 174 | + if p.exists(): |
| 175 | + needed.add(p) |
| 176 | + |
| 177 | + return sorted(needed) |
| 178 | + |
| 179 | + |
| 180 | +def get_images_from_compose_files(compose_files: List[Path]) -> List[str]: |
| 181 | + """Parse compose files and return a deduplicated list of image references. |
| 182 | +
|
| 183 | + Environment variable placeholders like `${DOCKER_NGINX_DAV_TAG:-latest}` are |
| 184 | + resolved from `os.environ`. For clickhouse images that appear without a tag |
| 185 | + (e.g. `clickhouse/integration-test`) the tag is looked up from `IMAGES_ENV`. |
| 186 | + Images with still-unresolvable variables are silently skipped. |
| 187 | + """ |
| 188 | + known_image_tags: dict[str, str] = {} |
| 189 | + for image_name, env_var in IMAGES_ENV.items(): |
| 190 | + tag = os.environ.get(env_var) |
| 191 | + if tag: |
| 192 | + known_image_tags[image_name] = tag |
| 193 | + |
| 194 | + def resolve_image(raw: str) -> Optional[str]: |
| 195 | + def replace_var(m: re.Match) -> str: |
| 196 | + var_name = m.group(1) |
| 197 | + default = m.group(2) if m.group(2) is not None else "latest" |
| 198 | + return os.environ.get(var_name, default) |
| 199 | + |
| 200 | + resolved = re.sub(r"\$\{(\w+)(?::-([^}]*))?\}", replace_var, raw) |
| 201 | + if "${" in resolved: |
| 202 | + return None # Still-unresolvable variable — skip |
| 203 | + # Append the correct tag for tagless known clickhouse images |
| 204 | + if ":" not in resolved and resolved in known_image_tags: |
| 205 | + resolved = f"{resolved}:{known_image_tags[resolved]}" |
| 206 | + return resolved |
| 207 | + |
| 208 | + images: set[str] = set() |
| 209 | + for compose_file in compose_files: |
| 210 | + try: |
| 211 | + content = compose_file.read_text() |
| 212 | + except OSError: |
| 213 | + continue |
| 214 | + for m in re.finditer(r"^\s+image:\s+(.+)$", content, re.MULTILINE): |
| 215 | + # Strip inline YAML comments from unquoted values before resolving |
| 216 | + # (e.g. `coredns/coredns:1.9.3 # :latest broke this test`). |
| 217 | + raw = re.sub(r"\s+#.*$", "", m.group(1).strip()) |
| 218 | + resolved = resolve_image(raw) |
| 219 | + if resolved: |
| 220 | + images.add(resolved) |
| 221 | + |
| 222 | + return sorted(images) |
| 223 | + |
| 224 | + |
| 225 | +def prefetch_images( |
| 226 | + images: List[str], retries: int = 3, pull_timeout: int = 300 |
| 227 | +) -> bool: |
| 228 | + """Pull every image in parallel using `ci/prefetch-integration-test-images`. |
| 229 | +
|
| 230 | + Images with no manifest for the current architecture (e.g. amd64-only images |
| 231 | + on arm64 runners) are silently skipped. Returns True on success, False if any |
| 232 | + image fails to pull for a real reason. |
| 233 | + """ |
| 234 | + if not images: |
| 235 | + print("No images to pre-fetch.") |
| 236 | + return True |
| 237 | + |
| 238 | + script = f"{repo_dir}/ci/jobs/scripts/prefetch-integration-test-images" |
| 239 | + env = { |
| 240 | + **os.environ, |
| 241 | + "PULL_RETRIES": str(retries), |
| 242 | + "PULL_TIMEOUT": str(pull_timeout), |
| 243 | + } |
| 244 | + return Shell.check( |
| 245 | + f"{script} {' '.join(images)}", |
| 246 | + verbose=True, |
| 247 | + env=env, |
| 248 | + ) |
| 249 | + |
| 250 | + |
106 | 251 | def parse_args(): |
107 | 252 | parser = argparse.ArgumentParser(description="ClickHouse Build Job") |
108 | 253 | parser.add_argument("--options", help="Job parameters: ...") |
@@ -617,6 +762,22 @@ def main(): |
617 | 762 | else: |
618 | 763 | assert False, f"No tag found for image [{image_name}]" |
619 | 764 |
|
| 765 | + # Pre-fetch all Docker images needed by the selected test suites. |
| 766 | + # This is done after IMAGES_ENV vars are set so tag resolution works correctly. |
| 767 | + # Fail fast here rather than discovering missing images mid-test-run. |
| 768 | + all_test_modules = parallel_test_modules + sequential_test_modules |
| 769 | + compose_files = get_compose_files_for_test_modules(all_test_modules) |
| 770 | + print( |
| 771 | + f"Compose files detected for this batch ({len(compose_files)}): " |
| 772 | + + ", ".join(str(f.name) for f in compose_files) |
| 773 | + ) |
| 774 | + images_to_prefetch = get_images_from_compose_files(compose_files) |
| 775 | + if not prefetch_images(images_to_prefetch): |
| 776 | + Result.create_from( |
| 777 | + status=Result.Status.ERROR, |
| 778 | + info="Failed to pre-pull Docker images needed by the test batch", |
| 779 | + ).complete_job() |
| 780 | + |
620 | 781 | test_env = { |
621 | 782 | "CLICKHOUSE_TESTS_BASE_CONFIG_DIR": clickhouse_server_config_dir, |
622 | 783 | "CLICKHOUSE_TESTS_SERVER_BIN_PATH": clickhouse_path, |
|
0 commit comments