Skip to content
Merged
56 changes: 54 additions & 2 deletions R/test.data.table.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent,
test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent, testPattern=NULL,
memtest=Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0), memtest.id=NULL) {
stopifnot(isTRUEorFALSE(verbose), isTRUEorFALSE(silent), isTRUEorFALSE(showProgress))
memtest = as.integer(memtest)
Expand Down Expand Up @@ -38,7 +38,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
scripts = scripts[!grepl("bench|other", scripts)]
scripts = gsub("[.]bz2$","",scripts)
return(sapply(scripts, function(fn) {
err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress))
err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress, testPattern=testPattern))
cat("\n");
isTRUE(err)
}))
Expand Down Expand Up @@ -140,6 +140,58 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.")
}

# nocov start: only used interactively -- "production" suites should always run in full
if (!is.null(testPattern)) {
# due to how non-hermetic our tests are, the simple approach (pass this to test(), return early if 'numStr' matches testPattern)
# does not work, or at least getting it to work is not much more efficient (see initial commit of #6040). so instead,
# here we parse the file, extract the tests that match the pattern to a new file, and include other setup lines likely required
# to run the tests successfully. two major drawbacks (1) we can only take a guess which lines are required, so this approach
# can't work (or at least, may need a lot of adjustment) for _every_ test, though not working is also a good sign that test
# should be refactored to be more hermetic (2) not all tests have literal test numbers, meaning we can't always match the
# runtime test number (i.e. 'numStr') since we're just doing a static check here, though we _are_ careful to match the
# full test expression string, i.e., not just limited to numeric literal test numbers.
arg_line = call_id = col1 = col2 = i.line1 = id = line1 = parent = preceding_line = test_start_line = text = token = x.line1 = x.parent = NULL # R CMD check
pd = setDT(utils::getParseData(parse(fn)))
file_lines = readLines(fn)
# NB: a call looks like (with id/parent tracking)
# <expr>
# <expr "lhs"><SYMBOL_FUNCTION_CALL>name</SYMBOL_FUNCTION_CALL></expr>
# <LEFT_PAREN>(</LEFT_PAREN>
# <expr "arg1"> ... </expr>
# ...
# <RIGHT_PAREN>)</RIGHT_PAREN>
# </expr>
## navigate up two steps from 'test' SYMBOL_FUNCTION_CALL to the overall 'expr' for the call
test_calls = pd[pd[pd[token == 'SYMBOL_FUNCTION_CALL' & text == 'test'], list(call_lhs_id = id, call_id = x.parent), on=c(id='parent')], .(line1, id), on=c(id='call_id')]
## all the arguments for each call to test()
test_call_args = test_calls[pd[token == 'expr'], .(call_id = parent, arg_line = i.line1, col1, col2), on=c(id='parent'), nomatch=NULL]
## 2nd argument is the num= argument
test_num_expr = test_call_args[ , .SD[2L], by="call_id"]
# NB: subtle assumption that 2nd arg to test() is all on one line, true as of 2024-Apr and likely to remain so
keep_test_ids = test_num_expr[grepl(testPattern, substring(file_lines[arg_line], col1, col2)), call_id]
# Now find all tests just previous to the keep tests; we want to keep non-test setup lines between them, e.g.
# test(drop, ...)
# setup_line1 # retain
# setup_line2 # retain
# test(keep, ...) # retain
intertest_ranges = test_calls[!id %in% keep_test_ids][test_calls[id %in% keep_test_ids], .(preceding_line = x.line1, test_start_line = i.line1), on='line1', roll=TRUE]
# TODO(michaelchirico): this doesn't do well with tests inside control statements.
# those could be included by looking for tests with parent!=0, i.e., not-top-level tests,
# and including the full parent for such tests. omitting for now until needed.
keep_lines = intertest_ranges[, sort(unique(unlist(Map(function(l, u) l:u, preceding_line+1L, test_start_line))))]
header_lines = seq_len(test_calls$line1[1L]-1L)

tryCatch(error = function(c) warningf("Attempt to subset to %d tests matching '%s' failed, running full suite.", length(keep_test_ids), testPattern), {
new_script = file_lines[c(header_lines, keep_lines)]
parse(text = new_script) # as noted above the static approach is not fool-proof (yet?), so force the script to at least parse before continuing.
fn = tempfile()
on.exit(unlink(fn), add=TRUE)
catf("Running %d of %d tests matching '%s'\n", length(keep_test_ids), nrow(test_calls), testPattern)
writeLines(new_script, fn)
})
}
# nocov end

err = try(sys.source(fn, envir=env), silent=silent)

options(oldOptions)
Expand Down
2 changes: 2 additions & 0 deletions man/test.data.table.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".",
silent = FALSE,
showProgress = interactive() && !silent,
testPattern = NULL,
memtest = Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0),
memtest.id = NULL)
}
Expand All @@ -17,6 +18,7 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".",
\item{pkg}{ Root directory name under which all package content (ex: DESCRIPTION, src/, R/, inst/ etc..) resides. Used only in \emph{dev-mode}. }
\item{silent}{ Controls what happens if a test fails. Like \code{silent} in \code{\link{try}}, \code{TRUE} causes the error message to be suppressed and \code{FALSE} to be returned, otherwise the error is returned. }
\item{showProgress}{ Output 'Running test <n> ...\\r' at the start of each test? }
\item{testPattern}{ When present, a regular expression tested againt the number of each test for inclusion. Useful for running only a small portion of a large test script. }
\item{memtest}{ Measure and report memory usage of tests (1:gc before ps, 2:gc after ps) rather than time taken (0) by default. Intended for and tested on Linux. See PR #5515 for more details. }
\item{memtest.id}{ An id for which to print memory usage for every sub id. May be a range of ids. }
}
Expand Down