Skip to content

Commit faf5c1b

Browse files
perf: compare checksums of input files < 1MB (before (10KB); move source cache into .snakemake directory in the workdir instead of the user specific app cache (improves multi-user capability of workflows)
1 parent d766a48 commit faf5c1b

File tree

3 files changed

+16
-5
lines changed

3 files changed

+16
-5
lines changed

snakemake/io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,15 +600,15 @@ async def is_checksum_eligible(self):
600600
return (
601601
await self.exists_local()
602602
and not os.path.isdir(self.file)
603-
and await self.size() < 100000
603+
and await self.size() <= 1000000
604604
and not self.is_fifo()
605605
)
606606

607607
async def checksum(self, force=False):
608608
"""Return checksum if file is small enough, else None.
609609
Returns None if file does not exist. If force is True,
610610
omit eligibility check."""
611-
if force or await self.is_checksum_eligible(): # less than 100000 bytes
611+
if force or await self.is_checksum_eligible(): # less than 1 MB
612612
checksum = sha256()
613613
if await self.size() > 0:
614614
# only read if file is bigger than zero

snakemake/sourcecache.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import shutil
1111
import stat
12+
from typing import Optional
1213
from snakemake import utils
1314
import tempfile
1415
import io
@@ -231,6 +232,12 @@ def __init__(
231232
def __post_init__(self):
232233
pass
233234

235+
def mtime(self) -> Optional[float]:
236+
# Intentionally None, hence causing any caching to generate an updated mtime.
237+
# Switching commits/branches/refs in the same repo should cause rerun triggers
238+
# if those files are used as input files for jobs and have changed checksums.
239+
return None
240+
234241
def is_persistently_cacheable(self):
235242
return bool(self.tag or self.commit)
236243

snakemake/workflow.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,15 @@ def register_resource(self, name: str, value: Union[int, str]):
252252
def source_cache_path(self) -> Path:
253253
assert self.storage_settings is not None
254254
if SharedFSUsage.SOURCE_CACHE not in self.storage_settings.shared_fs_usage:
255+
# TODO can this cause issues with the source cache?
256+
# When regenerating it for each remote job, might that accidentally trigger
257+
# job reruns where an input file or a source file suddenly becomes newer?
255258
return self.snakemake_tmp_dir / "source-cache"
256259
else:
257-
return Path(
258-
os.path.join(get_appdirs().user_cache_dir, "snakemake/source-cache")
259-
)
260+
# The source cache should be shared across different users of the same
261+
# workflow. Hence we have to make sure that it is in a directory that is
262+
# the same for all people that run the same workflow in the same workdir.
263+
return self.persistence.path / "source-cache"
260264

261265
@property
262266
def storage_registry(self):

0 commit comments

Comments
 (0)