Skip to content

Commit cc4ddf3

Browse files
committed
fix(config): restore $ref resolution for schemas
This commit fixes a regression introduced in Snakemake 9.6.0 where relative `$ref` references in local config schemas were not correctly resolved during validation. This regression was introduced when refactoring the config validation from the (deprecated) `RefResolver` to the (current) `resolving` API in #3420. The two required changes were: 1. Injection of an absolute file:// $id in schemas to enforce proper local reference resolution. 2. A follow-up update to retrieve_uri to correctly load subschemas (as it now receives a file:// URI). In addition, to prevent future regressions, this commit adds tests for (nested) `$ref`s, `$ref` with remote `$id` (see below), fragment references, `allOf`/`anyOf` defaults, and `$def`s. This commit restores pre-9.6.0 behavior and prevents ValidationError when nested schemas are referenced. Caveat: This commit intentionally does *not* restore one specific aspect of the old (`RefResolver` based) implementation: If a schema file contains an explicit `$id` pointing to a remote copy of the schema, the old implementation would fetch any references (in- or external) from that remote URI. Instead, the implementation introduced by this commit always resolves references based on the local schema file. This behaviour is demonstrated (and validated) with the remote `$id` test mentioned above. The following checks have been performed to ensure the correctness of this patch: 1. All tests (but the remote `$id` one, see above) were successfully run at commit `960f6a89eaa31da6014e810dfcf08f635ac03a6e` (last ancestor of current `main` that still uses the old `RefResolver` implementation), proving that they cover pre-9.6.0 behaviour. 2. Re-running the same tests at commit `cf724272eefcd9b30fb625d2e16380727bef9c3e` (direct child of the above, introducing the changes from PR #3420), the three (nested) `$ref`s test fail, verifying this commit/PR caused the regression and the tests added in this commit catch that regression. 3. The same tests still fail at commit `a9c9d275f1f6a2069b78f79db01f718e2ea16734` (current `main`), demonstrating that the regression has not been fixed yet. 4. All tests pass after applying the changes from this commit, validating it fixes the regression. --- fixes #3648
1 parent a9c9d27 commit cc4ddf3

2 files changed

Lines changed: 253 additions & 2 deletions

File tree

src/snakemake/utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ def validate(data, schema, set_default=True):
5050
import jsonschema
5151
from jsonschema import Draft202012Validator, validators
5252
from referencing import Registry, Resource
53+
from pathlib import Path
54+
from urllib.parse import urlparse
55+
from urllib.request import url2pathname
5356

5457
schemafile = infer_source_file(schema)
5558

@@ -65,8 +68,44 @@ def validate(data, schema, set_default=True):
6568

6669
schema = _load_configfile(source, filetype="Schema")
6770

71+
# Inject $id to enforce local reference resolution.
72+
# Ensures all relative $ref's are resolved relative to this local file,
73+
# even if it defines an explicit $id pointing to a remote location.
74+
# This is required because the retrieve_uri function (see below) does not
75+
# handle remote files.
76+
# This allows to mostly restores pre-9.6.0 behavior fixing the regression
77+
# caused by https://github.com/snakemake/snakemake/pull/3420 and reported
78+
# in https://github.com/snakemake/snakemake/issues/3648.
79+
# Note that the old (RefResolver based) implementation did handle remote
80+
# URI fetching and could therefore respect remote URIs defined as ID
81+
# withing the config file schema.
82+
# To fully restore that behaviour, retrieve_uri would have to be expanded
83+
# to fetch remote resources and this injection would have to be condition
84+
# on the schema not defining an ID.
85+
# However, in the context of config file validation, resolving a reference
86+
# defined in a local schema file through a remote request seems to solve no
87+
# purpose: Either the schemas are identical, in which relying on the local,
88+
# known-to-exist version is more efficient, or they differ for some reason
89+
# in which case not using the ref text from the local schema file might
90+
# cause very surprising and hard to track down behaviour.
91+
# Therefore, the new (resolving based) implementation purposefully breaks
92+
# backwards-compatibility with the former (RefResolver based) one.
93+
# This is also made explicit through the
94+
# test_config_ref_relative_with_remote_id test added to test_schema.py.
95+
schema["$id"] = Path(schemafile.get_path_or_uri()).resolve().as_uri()
96+
6897
def retrieve_uri(uri):
69-
return Resource.from_contents(contents=_load_configfile(uri, filetype="Schema"))
98+
# Note:
99+
# Relative $ref's are resolved against the (referencing) schema's $id
100+
# by the referencing library before calling retrieve.
101+
# Above, this was set to the local file's (absolute) file:// URI.
102+
# Since _load_configfile expects a file handle/path, and not a URI,
103+
# it must be parsed to strip off the (URI) schema.
104+
return Resource.from_contents(
105+
contents=_load_configfile(
106+
url2pathname(urlparse(uri).path), filetype="Schema"
107+
)
108+
)
70109

71110
resource = Resource.from_contents(contents=schema)
72111
registry = Registry(retrieve=retrieve_uri).with_resource(

tests/test_schema.py

Lines changed: 213 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import pytest
33
from snakemake.utils import validate
4+
from snakemake.exceptions import WorkflowError
45
import pandas as pd
56

67
CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
@@ -57,6 +58,106 @@
5758
- condition
5859
"""
5960

61+
RELATIVE_CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
62+
type: object
63+
properties:
64+
bar:
65+
$ref: "bar.schema.yaml"
66+
"""
67+
68+
NESTED_BAR_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
69+
type: object
70+
properties:
71+
baz:
72+
$ref: "baz.schema.yaml"
73+
required: ["baz"]
74+
"""
75+
76+
NESTED_BAZ_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
77+
type: object
78+
properties:
79+
qux:
80+
type: integer
81+
default: 42
82+
required: ["qux"]
83+
"""
84+
85+
REMOTE_ID_CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
86+
$id: "https://example.com/config.schema.yaml"
87+
type: object
88+
properties:
89+
bar:
90+
$ref: "bar.schema.yaml"
91+
"""
92+
93+
EXTERNAL_BAR_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
94+
type: object
95+
properties:
96+
foo:
97+
type: string
98+
required: ["foo"]
99+
"""
100+
101+
FRAGMENT_BAR_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
102+
definitions:
103+
frag_obj:
104+
type: object
105+
properties:
106+
baz:
107+
type: string
108+
required: ["baz"]
109+
"""
110+
111+
FRAGMENT_CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
112+
type: object
113+
properties:
114+
bar:
115+
$ref: "bar.schema.yaml#/definitions/frag_obj"
116+
required: ["bar"]
117+
"""
118+
119+
ALLOF_CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
120+
type: object
121+
properties:
122+
foo:
123+
allOf:
124+
- type: object
125+
properties:
126+
bar:
127+
type: integer
128+
default: 42
129+
required: ['bar']
130+
- type: object
131+
properties:
132+
foo:
133+
type: string
134+
default: "foo"
135+
required: ["foo"]
136+
"""
137+
138+
DEFS_CONFIG_SCHEMA = """$schema: "https://json-schema.org/draft/2020-12/schema#"
139+
$defs:
140+
bar:
141+
type: object
142+
properties:
143+
qux:
144+
type: string
145+
default: "qux"
146+
baz:
147+
type: object
148+
properties:
149+
qux:
150+
type: integer
151+
default: 42
152+
required: ["qux"]
153+
type: object
154+
properties:
155+
foo:
156+
anyOf:
157+
- $ref: "#/$defs/bar"
158+
- $ref: "#/$defs/baz"
159+
"""
160+
60161

61162
@pytest.fixture
62163
def schemadir(tmpdir):
@@ -116,6 +217,69 @@ def config_schema_ref(schemadir, bar_schema, json_bar_schema):
116217
return p
117218

118219

220+
@pytest.fixture
221+
def config_schema_relative(schemadir):
222+
p = schemadir.join("config.relative.schema.yaml")
223+
p.write(RELATIVE_CONFIG_SCHEMA)
224+
bar = schemadir.join("bar.schema.yaml")
225+
bar.write(EXTERNAL_BAR_SCHEMA)
226+
return p
227+
228+
229+
@pytest.fixture
230+
def config_schema_relative_nested(schemadir):
231+
p = schemadir.join("config.relative.nested.schema.yaml")
232+
p.write(RELATIVE_CONFIG_SCHEMA)
233+
bar = schemadir.join("bar.schema.yaml")
234+
bar.write(NESTED_BAR_SCHEMA)
235+
baz = schemadir.join("baz.schema.yaml")
236+
baz.write(NESTED_BAZ_SCHEMA)
237+
return p
238+
239+
240+
@pytest.fixture
241+
def config_schema_relative_ref_with_remote_id(schemadir):
242+
p = schemadir.join("config.relative_ref.remote_id.schema.yaml")
243+
p.write(REMOTE_ID_CONFIG_SCHEMA)
244+
bar = schemadir.join("bar.schema.yaml")
245+
bar.write(EXTERNAL_BAR_SCHEMA)
246+
return p
247+
248+
249+
@pytest.fixture
250+
def config_schema_relative_ref_with_fragment(schemadir):
251+
p = schemadir.join("config.relative_ref.fragment.schema.yaml")
252+
p.write(FRAGMENT_CONFIG_SCHEMA)
253+
bar = schemadir.join("bar.schema.yaml")
254+
bar.write(FRAGMENT_BAR_SCHEMA)
255+
return p
256+
257+
258+
@pytest.fixture
259+
def config_schema_relative_nested_with_default(schemadir):
260+
p = schemadir.join("config.relative.nested.default.schema.yaml")
261+
p.write(RELATIVE_CONFIG_SCHEMA)
262+
bar = schemadir.join("bar.schema.yaml")
263+
bar.write(NESTED_BAR_SCHEMA)
264+
baz = schemadir.join("baz.schema.yaml")
265+
baz.write(NESTED_BAZ_SCHEMA)
266+
return p
267+
268+
269+
@pytest.fixture
270+
def config_schema_allof_default(schemadir):
271+
p = schemadir.join("config.allof.default.schema.yaml")
272+
p.write(ALLOF_CONFIG_SCHEMA)
273+
return p
274+
275+
276+
@pytest.fixture
277+
def config_schema_defs(schemadir):
278+
p = schemadir.join("config.defs.schema.yaml")
279+
p.write(DEFS_CONFIG_SCHEMA)
280+
return p
281+
282+
119283
def test_config(config_schema):
120284
config = {}
121285
validate(config, str(config_schema), False)
@@ -133,7 +297,6 @@ def test_config_ref(config_schema_ref):
133297
# Make sure regular validator works
134298
config["param"]["bar"] = 1
135299
config["param"]["jsonbar"] = 2
136-
from snakemake.exceptions import WorkflowError
137300

138301
with pytest.raises(WorkflowError):
139302
validate(config, str(config_schema_ref), False)
@@ -146,3 +309,52 @@ def test_dataframe(df_schema):
146309
validate(df, str(df_schema))
147310
assert sorted(df.columns) == sorted(["sample", "condition", "case", "date"])
148311
assert df.case.loc[0]
312+
313+
314+
def test_config_ref_relative(config_schema_relative):
315+
config = {"bar": {}}
316+
with pytest.raises(WorkflowError):
317+
validate(config, str(config_schema_relative), set_default=False)
318+
config = {"bar": {"foo": "baz"}}
319+
validate(config, str(config_schema_relative), set_default=False)
320+
321+
322+
def test_config_ref_relative_nested(config_schema_relative_nested):
323+
config = {"bar": {"baz": {}}}
324+
with pytest.raises(WorkflowError):
325+
validate(config, str(config_schema_relative_nested), set_default=False)
326+
config = {"bar": {"baz": {"qux": 19}}}
327+
validate(config, str(config_schema_relative_nested), set_default=False)
328+
329+
330+
def test_config_ref_relative_with_remote_id(config_schema_relative_ref_with_remote_id):
331+
config = {"bar": {}}
332+
with pytest.raises(WorkflowError):
333+
validate(
334+
config, str(config_schema_relative_ref_with_remote_id), set_default=False
335+
)
336+
config = {"bar": {"foo": "baz"}}
337+
validate(config, str(config_schema_relative_ref_with_remote_id), set_default=False)
338+
339+
340+
def test_config_ref_relative_with_fragment(config_schema_relative_ref_with_fragment):
341+
config = {"bar": None}
342+
with pytest.raises(WorkflowError):
343+
validate(
344+
config, str(config_schema_relative_ref_with_fragment), set_default=False
345+
)
346+
config = {"bar": {"baz": "value"}}
347+
validate(config, str(config_schema_relative_ref_with_fragment), set_default=False)
348+
349+
350+
def test_config_allof_default(config_schema_allof_default):
351+
config = {"foo": {}}
352+
validate(config, str(config_schema_allof_default), set_default=True)
353+
assert config["foo"]["bar"] == 42
354+
assert config["foo"]["foo"] == "foo"
355+
356+
357+
def test_config_anyof_via_defs_default(config_schema_defs):
358+
config = {"foo": {}}
359+
validate(config, str(config_schema_defs), set_default=True)
360+
assert config["foo"]["qux"] == "qux"

0 commit comments

Comments
 (0)