Rdatatable · MichaelChirico · Apr 10, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
@@ -1,4 +1,4 @@
-test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent,
+test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent, testPattern=NULL,
                            memtest=Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0), memtest.id=NULL) {
   stopifnot(isTRUEorFALSE(verbose), isTRUEorFALSE(silent), isTRUEorFALSE(showProgress))
   memtest = as.integer(memtest)
@@ -38,7 +38,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     scripts = scripts[!grepl("bench|other", scripts)]
     scripts = gsub("[.]bz2$","",scripts)
     return(sapply(scripts, function(fn) {
-      err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress))
+      err = try(test.data.table(script=fn, verbose=verbose, pkg=pkg, silent=silent, showProgress=showProgress, testPattern=testPattern))
       cat("\n");
       isTRUE(err)
     }))
@@ -140,6 +140,58 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.")
   }
 
+  # nocov start: only used interactively -- "production" suites should always run in full
+  if (!is.null(testPattern)) {
+    # due to how non-hermetic our tests are, the simple approach (pass this to test(), return early if 'numStr' matches testPattern)
+    #   does not work, or at least getting it to work is not much more efficient (see initial commit of #6040). so instead,
+    #   here we parse the file, extract the tests that match the pattern to a new file, and include other setup lines likely required
+    #   to run the tests successfully. two major drawbacks (1) we can only take a guess which lines are required, so this approach
+    #   can't work (or at least, may need a lot of adjustment) for _every_ test, though not working is also a good sign that test
+    #   should be refactored to be more hermetic (2) not all tests have literal test numbers, meaning we can't always match the
+    #   runtime test number (i.e. 'numStr') since we're just doing a static check here, though we _are_ careful to match the
+    #   full test expression string, i.e., not just limited to numeric literal test numbers.
+    arg_line = call_id = col1 = col2 = i.line1 = id = line1 = parent = preceding_line = test_start_line = text = token = x.line1 = x.parent = NULL # R CMD check
+    pd = setDT(utils::getParseData(parse(fn)))
+    file_lines = readLines(fn)
+    # NB: a call looks like (with id/parent tracking)
+    # <expr>
+    #   <expr "lhs"><SYMBOL_FUNCTION_CALL>name</SYMBOL_FUNCTION_CALL></expr>
+    #   <LEFT_PAREN>(</LEFT_PAREN>
+    #   <expr "arg1"> ... </expr>
+    #   ...
+    #   <RIGHT_PAREN>)</RIGHT_PAREN>
+    # </expr>
+    ## navigate up two steps from 'test' SYMBOL_FUNCTION_CALL to the overall 'expr' for the call
+    test_calls = pd[pd[pd[token == 'SYMBOL_FUNCTION_CALL' & text == 'test'], list(call_lhs_id = id, call_id = x.parent), on=c(id='parent')], .(line1, id), on=c(id='call_id')]
+    ## all the arguments for each call to test()
+    test_call_args = test_calls[pd[token == 'expr'], .(call_id = parent, arg_line = i.line1, col1, col2), on=c(id='parent'), nomatch=NULL]
+    ## 2nd argument is the num= argument
+    test_num_expr = test_call_args[ , .SD[2L], by="call_id"]
+    # NB: subtle assumption that 2nd arg to test() is all on one line, true as of 2024-Apr and likely to remain so
+    keep_test_ids = test_num_expr[grepl(testPattern, substring(file_lines[arg_line], col1, col2)), call_id]
+    # Now find all tests just previous to the keep tests; we want to keep non-test setup lines between them, e.g.
+    # test(drop, ...)
+    # setup_line1     # retain
+    # setup_line2     # retain
+    # test(keep, ...) # retain
+    intertest_ranges = test_calls[!id %in% keep_test_ids][test_calls[id %in% keep_test_ids], .(preceding_line = x.line1, test_start_line = i.line1), on='line1', roll=TRUE]
+    # TODO(michaelchirico): this doesn't do well with tests inside control statements.
+    #   those could be included by looking for tests with parent!=0, i.e., not-top-level tests,
+    #   and including the full parent for such tests. omitting for now until needed.
+    keep_lines = intertest_ranges[, sort(unique(unlist(Map(function(l, u) l:u, preceding_line+1L, test_start_line))))]
+    header_lines = seq_len(test_calls$line1[1L]-1L)
+
+    tryCatch(error = function(c) warningf("Attempt to subset to %d tests matching '%s' failed, running full suite.", length(keep_test_ids), testPattern), {
+      new_script = file_lines[c(header_lines, keep_lines)]
+      parse(text = new_script) # as noted above the static approach is not fool-proof (yet?), so force the script to at least parse before continuing.
+      fn = tempfile()
+      on.exit(unlink(fn), add=TRUE)
+      catf("Running %d of %d tests matching '%s'\n", length(keep_test_ids), nrow(test_calls), testPattern)
+      writeLines(new_script, fn)
+    })
+  }
+  # nocov end
+
   err = try(sys.source(fn, envir=env), silent=silent)
 
   options(oldOptions)

@@ -8,6 +8,7 @@
 test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".",
                 silent = FALSE,
                 showProgress = interactive() && !silent,
+                testPattern = NULL,
                 memtest = Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0),
                 memtest.id = NULL)
 }
@@ -17,6 +18,7 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".",
 \item{pkg}{ Root directory name under which all package content (ex: DESCRIPTION, src/, R/, inst/ etc..) resides. Used only in \emph{dev-mode}. }
 \item{silent}{ Controls what happens if a test fails. Like \code{silent} in \code{\link{try}}, \code{TRUE} causes the error message to be suppressed and \code{FALSE} to be returned, otherwise the error is returned. }
 \item{showProgress}{ Output 'Running test <n> ...\\r' at the start of each test? }
+\item{testPattern}{ When present, a regular expression tested againt the number of each test for inclusion. Useful for running only a small portion of a large test script. }
 \item{memtest}{ Measure and report memory usage of tests (1:gc before ps, 2:gc after ps) rather than time taken (0) by default. Intended for and tested on Linux. See PR #5515 for more details. }
 \item{memtest.id}{ An id for which to print memory usage for every sub id. May be a range of ids. }
 }