|
24 | 24 | import sys |
25 | 25 | import traceback |
26 | 26 | from itertools import repeat |
| 27 | +from tempfile import NamedTemporaryFile |
27 | 28 | from typing import Iterator |
28 | 29 |
|
29 | 30 | import requests |
30 | 31 | import urllib3.exceptions |
31 | 32 | from requests.adapters import DEFAULT_POOLSIZE |
| 33 | +from sphinx.util.inventory import InventoryFileReader |
32 | 34 |
|
33 | 35 | from airflow.utils.helpers import partition |
34 | 36 | from docs.exts.docs_build.docs_builder import get_available_providers_packages |
|
47 | 49 |
|
48 | 50 | def _fetch_file(session: requests.Session, package_name: str, url: str, path: str) -> tuple[str, bool]: |
49 | 51 | """ |
50 | | - Download a file and returns status information as a tuple with package |
| 52 | + Download a file, validate Sphinx Inventory headers and returns status information as a tuple with package |
51 | 53 | name and success status(bool value). |
52 | 54 | """ |
53 | 55 | try: |
54 | 56 | response = session.get(url, allow_redirects=True, stream=True) |
55 | 57 | except (requests.RequestException, urllib3.exceptions.HTTPError): |
56 | | - print(f"Failed to fetch inventory: {url}") |
| 58 | + print(f"{package_name}: Failed to fetch inventory: {url}") |
57 | 59 | traceback.print_exc(file=sys.stderr) |
58 | 60 | return package_name, False |
59 | 61 | if not response.ok: |
60 | | - print(f"Failed to fetch inventory: {url}") |
61 | | - print(f"Failed with status: {response.status_code}", file=sys.stderr) |
| 62 | + print(f"{package_name}: Failed to fetch inventory: {url}") |
| 63 | + print(f"{package_name}: Failed with status: {response.status_code}", file=sys.stderr) |
62 | 64 | return package_name, False |
63 | 65 |
|
64 | | - os.makedirs(os.path.dirname(path), exist_ok=True) |
65 | | - with open(path, "wb") as f: |
66 | | - response.raw.decode_content = True |
67 | | - shutil.copyfileobj(response.raw, f) |
68 | | - print(f"Fetched inventory: {url}") |
| 66 | + if response.url != url: |
| 67 | + print(f"{package_name}: {url} redirected to {response.url}") |
| 68 | + |
| 69 | + with NamedTemporaryFile(suffix=package_name, mode="wb+") as tf: |
| 70 | + for chunk in response.iter_content(chunk_size=4096): |
| 71 | + tf.write(chunk) |
| 72 | + |
| 73 | + tf.flush() |
| 74 | + tf.seek(0, 0) |
| 75 | + |
| 76 | + line = InventoryFileReader(tf).readline() |
| 77 | + if not line.startswith("# Sphinx inventory version"): |
| 78 | + print(f"{package_name}: Response contain unexpected Sphinx Inventory header: {line!r}.") |
| 79 | + return package_name, False |
| 80 | + |
| 81 | + tf.seek(0, 0) |
| 82 | + os.makedirs(os.path.dirname(path), exist_ok=True) |
| 83 | + with open(path, "wb") as f: |
| 84 | + shutil.copyfileobj(tf, f) |
| 85 | + |
| 86 | + print(f"{package_name}: Fetched inventory: {response.url}") |
69 | 87 | return package_name, True |
70 | 88 |
|
71 | 89 |
|
@@ -136,5 +154,7 @@ def fetch_inventories(): |
136 | 154 | print("Failed packages:") |
137 | 155 | for pkg_no, (pkg_name, _) in enumerate(failed, start=1): |
138 | 156 | print(f"{pkg_no}. {pkg_name}") |
| 157 | + print("Terminate execution.") |
| 158 | + raise SystemExit(1) |
139 | 159 |
|
140 | 160 | return [pkg_name for pkg_name, status in failed] |
0 commit comments