Skip to content

Commit 63df690

Browse files
committed
[Utils] Fix HfUri round-trip for revisions with '/' and hyphenated convert refs
- to_uri() now URL-encodes '/' in non-special revisions so 'feature/foo' round-trips correctly. - _SPECIAL_REFS_REVISION_REGEX accepts '-' and '.' in convert ref names (e.g. 'refs/convert/parquet-v2').
1 parent 37c0a91 commit 63df690

2 files changed

Lines changed: 24 additions & 1 deletion

File tree

src/huggingface_hub/utils/_hf_uris.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@
4949
# Special revisions that contain a '/'. They take precedence when splitting
5050
# the part after '@' into '<revision>/<path-in-repo>'. Matches 'refs/pr/N'
5151
# (Pull Request refs) and 'refs/convert/<name>' (e.g. parquet conversions).
52-
_SPECIAL_REFS_REVISION_REGEX = re.compile(r"^refs/(?:convert/\w+|pr/\d+)")
52+
# The conversion name allows the typical git ref characters '[a-zA-Z0-9_.-]'
53+
# so names like 'parquet-v2' or 'duckdb.v1' round-trip correctly.
54+
_SPECIAL_REFS_REVISION_REGEX = re.compile(r"^refs/(?:convert/[\w.-]+|pr/\d+)")
5355

5456

5557
@dataclass(frozen=True)

tests/test_utils_hf_uris.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,27 @@
139139
),
140140
"hf://datasets/foo/bar@refs/convert/parquet/data.parquet",
141141
),
142+
# Convert ref name with hyphen (and dot) — must not be split at the hyphen.
143+
(
144+
"hf://datasets/foo/bar@refs/convert/parquet-v2/data.parquet",
145+
HfUri(
146+
type="dataset",
147+
id="foo/bar",
148+
revision="refs/convert/parquet-v2",
149+
path_in_repo="data.parquet",
150+
),
151+
"hf://datasets/foo/bar@refs/convert/parquet-v2/data.parquet",
152+
),
153+
(
154+
"hf://datasets/foo/bar@refs/convert/duckdb.v1/data.db",
155+
HfUri(
156+
type="dataset",
157+
id="foo/bar",
158+
revision="refs/convert/duckdb.v1",
159+
path_in_repo="data.db",
160+
),
161+
"hf://datasets/foo/bar@refs/convert/duckdb.v1/data.db",
162+
),
142163
# URL-encoded special revision
143164
(
144165
"hf://datasets/foo/bar@refs%2Fpr%2F10/file.csv",

0 commit comments

Comments
 (0)