Skip to content

Commit 2f663be

Browse files
authored
fix: Restore python rules changes triggering reruns. (GH: #3213) (#3485)
Trying to fix #3213 by caching the source code of python rules (using `dis` + `linecache` magic). I would appreciate any help in how to approach writing a test for this. cc @corneliusroemer ### QC <!-- Make sure that you can tick the boxes below. --> * [x] The PR contains a test case for the changes or the changes are already covered by an existing test case. * [x] The documentation (`docs/`) is updated to reflect the changes or this is not necessary (e.g. if the change does neither modify the language nor the behavior or functionalities of Snakemake). <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Enhanced rule processing to capture both shell command and Python function definitions when available. - Expanded the internal rule configuration to include additional source code information, supporting improved debugging and documentation. - Introduced a method to retrieve and store source details for rules automatically. - New workflow defined with rules for generating files based on time-stamped content. - **Bug Fixes** - Added tests to validate the behavior of the new rule processing and ensure expected outputs. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent d06159a commit 2f663be

File tree

6 files changed

+123
-4
lines changed

6 files changed

+123
-4
lines changed

src/snakemake/persistence.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -564,11 +564,13 @@ def _b64id(self, s):
564564

565565
@lru_cache()
566566
def _code(self, rule):
567-
# We only consider shell commands for now.
568-
# Plain python code rules are hard to capture because the pickling of the code
569-
# can change with different python versions.
570567
# Scripts and notebooks are triggered by changes in the script mtime.
571-
return rule.shellcmd if rule.shellcmd is not None else None
568+
# Changes to python and shell rules are triggered by changes in the plain text.
569+
if rule.shellcmd is not None:
570+
return rule.shellcmd
571+
if rule.run_func_src is not None:
572+
return rule.run_func_src
573+
return None
572574

573575
@lru_cache()
574576
def _conda_env(self, job):

src/snakemake/rules.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def __init__(self, name, workflow, lineno=None, snakefile=None):
108108
self._lineno = lineno
109109
self._snakefile = snakefile
110110
self.run_func = None
111+
self.run_func_src = None
111112
self.shellcmd = None
112113
self.script = None
113114
self.notebook = None

src/snakemake/workflow.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import subprocess
1111
import sys
1212
import platform
13+
import dis
14+
import linecache
1315
from collections import OrderedDict, namedtuple
1416
from collections.abc import Iterator, Mapping
1517
from itertools import filterfalse, chain
@@ -1762,6 +1764,26 @@ def ruleorder(self, *rulenames):
17621764
def localrules(self, *rulenames):
17631765
self._localrules.update(rulenames)
17641766

1767+
def get_rule_source(self, func):
1768+
# This can't use `inspect` because the functions are compiled into intermediate python code
1769+
# in parser.py and that intermediate source is not available anymore (or desirable).
1770+
# Instead, we're using dis to retrieve the line numbers of the function in the intermediate
1771+
# code and then map it to the original file using `self.linemaps`.
1772+
sourcefile = func.__code__.co_filename
1773+
line_numbers = []
1774+
linemap = self.linemaps[sourcefile]
1775+
for func_offset, line in dis.findlinestarts(func.__code__):
1776+
# The first instruction in the compiled function is RESUME, which
1777+
# with snakemake is mapped to the 'rule: ' line and is not considered
1778+
# part of the rule source.
1779+
if func_offset == 0:
1780+
continue
1781+
if line in linemap:
1782+
line_numbers.append(linemap[line])
1783+
return "".join(
1784+
[linecache.getline(sourcefile, lineno) for lineno in sorted(line_numbers)]
1785+
)
1786+
17651787
def rule(self, name=None, lineno=None, snakefile=None, checkpoint=False):
17661788
# choose a name for an unnamed rule
17671789
if name is None:
@@ -1980,6 +2002,8 @@ def check_may_use_software_deployment(method):
19802002
rule.name = ruleinfo.name
19812003
rule.docstring = ruleinfo.docstring
19822004
rule.run_func = ruleinfo.func
2005+
if rule.run_func is not None and not rule.norun:
2006+
rule.run_func_src = self.get_rule_source(rule.run_func)
19832007
rule.shellcmd = ruleinfo.shellcmd
19842008
rule.script = ruleinfo.script
19852009
rule.notebook = ruleinfo.notebook
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
rule all:
2+
input:
3+
"file_b.txt"
4+
5+
rule file_a:
6+
output:
7+
"file_a.txt"
8+
run:
9+
import time
10+
11+
with open(output[0], "w") as f:
12+
f.write("A" + str(time.time_ns()))
13+
14+
rule file_b:
15+
input:
16+
"file_a.txt"
17+
output:
18+
"file_b.txt"
19+
run:
20+
import time
21+
22+
with open(output[0], "w") as f:
23+
f.write("B" + str(time.time_ns()))
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
rule all:
2+
input:
3+
"file_b.txt"
4+
5+
rule file_a:
6+
output:
7+
"file_a.txt"
8+
run:
9+
import time
10+
11+
with open(output[0], "w") as f:
12+
f.write("A" + str(time.time_ns()))
13+
14+
rule file_b:
15+
input:
16+
"file_a.txt"
17+
output:
18+
"file_b.txt"
19+
run:
20+
import time
21+
22+
with open(output[0], "w") as f:
23+
f.write("D" + str(time.time_ns()))

tests/tests.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1977,6 +1977,52 @@ def test_github_issue1882():
19771977
shutil.rmtree(tmpdir)
19781978

19791979

1980+
def test_github_issue3213():
1981+
try:
1982+
import linecache
1983+
1984+
output_files = ["file_a.txt", "file_b.txt"]
1985+
1986+
tmpdir = run(dpath("test_github_issue3213"), cleanup=False, check_results=False)
1987+
ts_a1, ts_b1 = [open(os.path.join(tmpdir, f)).read() for f in output_files]
1988+
1989+
# Need to clear the cache so that we notice the Snakefile file has changed.
1990+
# In practice it's not a problem.
1991+
linecache.clearcache()
1992+
1993+
shutil.copy(
1994+
os.path.join(dpath("test_github_issue3213"), "Snakefile.newver"),
1995+
os.path.join(tmpdir, "Snakefile"),
1996+
)
1997+
run(
1998+
dpath("test_github_issue3213"),
1999+
tmpdir=tmpdir,
2000+
cleanup=False,
2001+
check_results=False,
2002+
)
2003+
ts_a2, ts_b2 = [open(os.path.join(tmpdir, f)).read() for f in output_files]
2004+
2005+
run(
2006+
dpath("test_github_issue3213"),
2007+
tmpdir=tmpdir,
2008+
cleanup=False,
2009+
check_results=False,
2010+
forceall=True,
2011+
)
2012+
ts_a3, ts_b3 = [open(os.path.join(tmpdir, f)).read() for f in output_files]
2013+
2014+
assert ts_a1[0] == ts_a2[0] == ts_a3[0] == "A"
2015+
assert ts_b1[0] == "B"
2016+
assert ts_b2[0] == ts_b3[0] == "D"
2017+
2018+
assert ts_a1 == ts_a2
2019+
assert ts_b1 != ts_b2
2020+
assert ts_a2 != ts_a3
2021+
assert ts_b2 != ts_b3
2022+
finally:
2023+
shutil.rmtree(tmpdir)
2024+
2025+
19802026
@skip_on_windows # not platform dependent
19812027
def test_inferred_resources():
19822028
run(dpath("test_inferred_resources"))

0 commit comments

Comments
 (0)