Skip to content

Commit 04306f1

Browse files
authored
Validate Sphinx Inventory file header (#28838)
1 parent e1e3a8e commit 04306f1

File tree

1 file changed

+29
-9
lines changed

1 file changed

+29
-9
lines changed

docs/exts/docs_build/fetch_inventories.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,13 @@
2424
import sys
2525
import traceback
2626
from itertools import repeat
27+
from tempfile import NamedTemporaryFile
2728
from typing import Iterator
2829

2930
import requests
3031
import urllib3.exceptions
3132
from requests.adapters import DEFAULT_POOLSIZE
33+
from sphinx.util.inventory import InventoryFileReader
3234

3335
from airflow.utils.helpers import partition
3436
from docs.exts.docs_build.docs_builder import get_available_providers_packages
@@ -47,25 +49,41 @@
4749

4850
def _fetch_file(session: requests.Session, package_name: str, url: str, path: str) -> tuple[str, bool]:
4951
"""
50-
Download a file and returns status information as a tuple with package
52+
Download a file, validate Sphinx Inventory headers and returns status information as a tuple with package
5153
name and success status(bool value).
5254
"""
5355
try:
5456
response = session.get(url, allow_redirects=True, stream=True)
5557
except (requests.RequestException, urllib3.exceptions.HTTPError):
56-
print(f"Failed to fetch inventory: {url}")
58+
print(f"{package_name}: Failed to fetch inventory: {url}")
5759
traceback.print_exc(file=sys.stderr)
5860
return package_name, False
5961
if not response.ok:
60-
print(f"Failed to fetch inventory: {url}")
61-
print(f"Failed with status: {response.status_code}", file=sys.stderr)
62+
print(f"{package_name}: Failed to fetch inventory: {url}")
63+
print(f"{package_name}: Failed with status: {response.status_code}", file=sys.stderr)
6264
return package_name, False
6365

64-
os.makedirs(os.path.dirname(path), exist_ok=True)
65-
with open(path, "wb") as f:
66-
response.raw.decode_content = True
67-
shutil.copyfileobj(response.raw, f)
68-
print(f"Fetched inventory: {url}")
66+
if response.url != url:
67+
print(f"{package_name}: {url} redirected to {response.url}")
68+
69+
with NamedTemporaryFile(suffix=package_name, mode="wb+") as tf:
70+
for chunk in response.iter_content(chunk_size=4096):
71+
tf.write(chunk)
72+
73+
tf.flush()
74+
tf.seek(0, 0)
75+
76+
line = InventoryFileReader(tf).readline()
77+
if not line.startswith("# Sphinx inventory version"):
78+
print(f"{package_name}: Response contain unexpected Sphinx Inventory header: {line!r}.")
79+
return package_name, False
80+
81+
tf.seek(0, 0)
82+
os.makedirs(os.path.dirname(path), exist_ok=True)
83+
with open(path, "wb") as f:
84+
shutil.copyfileobj(tf, f)
85+
86+
print(f"{package_name}: Fetched inventory: {response.url}")
6987
return package_name, True
7088

7189

@@ -136,5 +154,7 @@ def fetch_inventories():
136154
print("Failed packages:")
137155
for pkg_no, (pkg_name, _) in enumerate(failed, start=1):
138156
print(f"{pkg_no}. {pkg_name}")
157+
print("Terminate execution.")
158+
raise SystemExit(1)
139159

140160
return [pkg_name for pkg_name, status in failed]

0 commit comments

Comments
 (0)