Skip to content

Commit 4655e96

Browse files
authored
Add support for mounting .tgz files with filesystem metadata appended (#477)
* Bump dev version * Correct VFS image file extension in lzfs.R script * Remove `.data` assumption in `webr::mount()` * Add support for v2.0 VFS filesystem image format * Avoid Emscripten WORKERFS `mount()` under Node Instead, use our own `mountImageData()` function to create VFS nodes for each file in the VFS metadata package. TODO: This currently handles only metadata given in the form of the `packages` property. Emscripten supports additional `files` and `blobs` properties, and in the future we should also support those here. Fixes #328. * Update webr::mount() to default to v2.0 VFS images * Update documentation for VFS v2.0 * Update NEWS.md * Reorganise VFS mounting into TS module `mount.ts` * Mount as URL under Node if source begins http[s] * Add unit tests for mounting WORKERFS and NODEFS * Export types from webr-chan.ts * Fallback to mounting `.data` before using archive Also improves warning messaging during fallback(s). * Interpret metadata values as signed integers * Read metadata from tar contents if hint is missing * Add unit test for .tgz with no metadata hint
1 parent d455321 commit 4655e96

21 files changed

+455
-163
lines changed

NEWS.md

+12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# webR (development version)
22

3+
## New features
4+
5+
* Added support for directly mounting (optionally compressed) `.tar` archives as filesystem images. Archives must be pre-processed using the `rwasm` R package to append filesystem image metadata to `.tar` archive data.
6+
7+
## Breaking changes
8+
9+
* When installing binary R packages, webR will now default to mounting the R package binary `.tgz` file as a filesystem image. If this fails (e.g. the `.tgz` has not been processed to add filesystem image metadata) webR will fall back to a traditional install by extracting the contents of the `.tgz` file.
10+
11+
## Bug Fixes
12+
13+
* Mounting filesystem images using the `WORKERFS` filesystem type now works correctly under Node.js (#328).
14+
315
# webR 0.4.1
416

517
## New features

flake.nix

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# cd src; prefetch-npm-deps package-lock.json
2727
srcNpmDeps = pkgs.fetchNpmDeps {
2828
src = "${self}/src";
29-
hash = "sha256-bENxHgVxA2G31l7NR66braWIEwybDe2qAf12x3V5JUY=";
29+
hash = "sha256-KjG55UsbDIMxc5lRzSpqmmfc/tGKOwxXD6Gb+3lVLYU=";
3030
};
3131

3232
inherit system;

packages/webr/DESCRIPTION

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: webr
33
Title: WebR Support Package
4-
Version: 0.4.1
4+
Version: 0.4.1.9000
55
Authors@R: c(
66
person("George", "Stagg", , "[email protected]", role = c("aut", "cre")),
77
person("Lionel", "Henry", , "[email protected]", role = "aut"),
@@ -17,4 +17,4 @@ Imports:
1717
Encoding: UTF-8
1818
LazyData: true
1919
Roxygen: list(markdown = TRUE)
20-
RoxygenNote: 7.3.1
20+
RoxygenNote: 7.3.2

packages/webr/R/install.R

+18-16
Original file line numberDiff line numberDiff line change
@@ -67,21 +67,23 @@ install <- function(packages,
6767
if (!quiet) message(paste("Downloading webR package:", pkg))
6868

6969
if (mount) {
70-
# Try package.data URL, fallback to .tgz download if unavailable
71-
tryCatch(
72-
{
73-
install_vfs_image(repo, lib, pkg, pkg_ver)
74-
},
75-
error = function(cnd) {
76-
if (!grepl("Unable to download", conditionMessage(cnd))) {
77-
stop(cnd)
78-
}
79-
install_tgz(repo, lib, pkg, pkg_ver)
80-
}
81-
)
82-
} else {
83-
install_tgz(repo, lib, pkg, pkg_ver)
70+
# Try mounting `.tgz` as v2.0 image, fallback to `.data` v1.0 image
71+
tryCatch({
72+
install_vfs_image(repo, lib, pkg, pkg_ver, ".tgz")
73+
next
74+
}, error = function(cnd) {
75+
warning(paste(cnd$message, "Falling back to `.data` filesystem image."))
76+
})
77+
78+
tryCatch({
79+
install_vfs_image(repo, lib, pkg, pkg_ver, ".data")
80+
next
81+
}, error = function(cnd) {
82+
warning(paste(cnd$message, "Falling back to copying archive contents."))
83+
})
8484
}
85+
86+
install_tgz(repo, lib, pkg, pkg_ver)
8587
}
8688
invisible(NULL)
8789
}
@@ -100,8 +102,8 @@ install_tgz <- function(repo, lib, pkg, pkg_ver) {
100102
)
101103
}
102104

103-
install_vfs_image <- function(repo, lib, pkg, pkg_ver) {
104-
data_url <- file.path(repo, paste0(pkg, "_", pkg_ver, ".data"))
105+
install_vfs_image <- function(repo, lib, pkg, pkg_ver, ext) {
106+
data_url <- file.path(repo, paste0(pkg, "_", pkg_ver, ext))
105107
mountpoint <- file.path(lib, pkg)
106108
mount(mountpoint, data_url)
107109
}

packages/webr/R/mount.R

+8-6
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
#' directory in the virtual filesystem. The mountpoint will be created if it
66
#' does not already exist.
77
#'
8-
#' When mounting an Emscripten "workerfs" type filesystem the `source` should
9-
#' be the URL for a filesystem image with filename ending `.data`, as produced
10-
#' by Emscripten's `file_packager` tool. The filesystem image and metadata will
11-
#' be downloaded and mounted onto the directory `mountpoint`.
8+
#' When mounting an Emscripten "workerfs" type filesystem the `source` should be
9+
#' the URL or path to a filesystem image, as produced by Emscripten's
10+
#' `file_packager` tool or as the result of appending filesystem metadata to an
11+
#' `.tar` archive using [rwasm::add_tar_index()]. The filesystem image may be
12+
#' gzip compressed, indicated by the property `gzip: true` in the associated
13+
#' filesystem metadata. The filesystem metadata and contents will be loaded and
14+
#' mounted onto the directory `mountpoint`.
1215
#'
1316
#' When mounting an Emscripten "nodefs" type filesystem, the `source` should be
1417
#' the path to a physical directory on the host filesystem. The host directory
@@ -37,8 +40,7 @@ mount <- function(mountpoint, source, type = "workerfs") {
3740

3841
# Mount specified Emscripten filesystem type onto the given mountpoint
3942
if (tolower(type) == "workerfs") {
40-
base_url <- gsub(".data$", "", source)
41-
invisible(.Call(ffi_mount_workerfs, base_url, mountpoint))
43+
invisible(.Call(ffi_mount_workerfs, source, mountpoint))
4244
} else if (tolower(type) == "nodefs") {
4345
invisible(.Call(ffi_mount_nodefs, source, mountpoint))
4446
} else if (tolower(type) == "idbfs") {

packages/webr/man/mount.Rd

+7-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/webr/src/mount.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,14 @@ SEXP ffi_mount_workerfs(SEXP source, SEXP mountpoint) {
2929
CHECK_STRING(mountpoint);
3030

3131
EM_ASM({
32-
const baseUrl = UTF8ToString($0);
32+
const source = UTF8ToString($0);
3333
const mountpoint = UTF8ToString($1);
3434
try {
35-
Module.mountImageUrl(`${baseUrl}.data`, mountpoint);
35+
if (ENVIRONMENT_IS_NODE && !/^https?:/.test(source)) {
36+
Module.mountImagePath(source, mountpoint);
37+
} else {
38+
Module.mountImageUrl(source, mountpoint);
39+
}
3640
} catch (e) {
3741
let msg = e.message;
3842
if (e.name === "ErrnoError" && e.errno === 10) {

src/docs/mounting.qmd

+25-13
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,19 @@ Emscripten's API allows for several types of virtual filesystem, depending on th
1414

1515
| Filesystem | Description | Web Browser | Node.js |
1616
|------|-----|------|------|
17-
| `WORKERFS` | Mount Emscripten filesystem images. | &#x2705; | &#x2705;[^workerfs] |
17+
| `WORKERFS` | Mount Emscripten filesystem images. | &#x2705; | &#x2705; |
1818
| `NODEFS` | Mount existing host directories. | &#x274C; | &#x2705; |
1919
| `IDBFS` | Browser-based persistent storage using the [IndexedDB API](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API). | &#x2705;[^idbfs] | &#x274C; |
2020

21-
[^workerfs]: Be aware of the current GitHub issue [#328](https://github.com/r-wasm/webr/issues/328).
2221
[^idbfs]: Using the `PostMessage` [communication channel](communication.qmd) only.
2322

24-
## Emscripten filesystem images
23+
## Filesystem images
2524

26-
Emscripten filesystem images can be mounted using the `WORKERFS` filesystem type.
25+
Filesystem images are pre-prepared files containing a collection of files and associated metadata. The `WORKERFS` filesystem type can be used to efficiently make the contents of a filesystem image available to the WebAssembly R process.
2726

28-
The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool, provided by Emscripten, takes in a directory structure as input and produces webR compatible filesystem images as output. The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool may be invoked from R using the [rwasm](https://r-wasm.github.io/rwasm/) R package:
27+
### Emscripten's `file_packager` tool
28+
29+
The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool, provided by Emscripten, takes in a directory structure as input and produces a webR compatible filesystem image as output. The [`file_packager`](https://emscripten.org/docs/porting/files/packaging_files.html#packaging-using-the-file-packager-tool) tool may be invoked from R using the [rwasm](https://r-wasm.github.io/rwasm/) R package:
2930

3031
```{r eval=FALSE}
3132
> rwasm::file_packager("./input", out_dir = ".", out_name = "output")
@@ -40,12 +41,25 @@ $ file_packager output.data --preload ./input@/ \
4041

4142
In the above examples, the files in the directory `./input` are packaged and an output filesystem image is created^[When using the `file_packager` CLI, a third file named `output.js` will also be created. If you only plan to mount the image using webR, this file may be discarded.] consisting of a data file, `output.data`, and a metadata file, `output.js.metadata`.
4243

43-
To prepare for mounting the filesystem image with webR, ensure that both files have the same basename (in this example, `output`) and are deployed to static file hosting^[e.g. GitHub Pages, Netlify, AWS S3, etc.]. The resulting URLs for the two files should differ only by the file extension.
44+
To prepare for mounting the filesystem image with webR, ensure that both files have the same basename (in this example, `output`). The resulting URLs or relative paths for the two files should differ only by the file extension.
45+
46+
#### Compression
4447

48+
Filesystem image `.data` files may optionally be `gzip` compressed prior to deployment. The file extension for compressed filesystem images should be `.data.gz`, and compression should be indicated by setting the property `gzip: true` on the metadata JSON stored in the `.js.metadata` file.
4549

46-
## Mount a filesystem image from URL
50+
### Process archives with the `rwasm` package
4751

48-
By default, the [`webr::mount()`](api/r.qmd#mount) function downloads and mounts a filesystem image from a URL source, using the `WORKERFS` filesystem type.
52+
Archives in `.tar` format, optionally gzip compressed as `.tar.gz` or `.tgz` files, can also be used as filesystem images by pre-processing the `.tar` archive using the [rwasm](https://r-wasm.github.io/rwasm/) R package. The `rwasm::add_tar_index()` function reads the archive contents and appends the required filesystem metadata to the end of the `.tar` archive data in a way that is understood by webR.
53+
54+
```{r eval=FALSE}
55+
> rwasm::add_tar_index("./path/to/archive.tar.gz")
56+
```
57+
58+
Once processed by the `rwasm` R package, the archive can be deployed and used directly as a filesystem image.
59+
60+
## Mounting a filesystem image
61+
62+
When running in a web browser, the [`webr::mount()`](api/r.qmd#mount) function downloads and mounts a filesystem image from a URL source, using the `WORKERFS` filesystem type.
4963

5064
```{r eval=FALSE}
5165
webr::mount(
@@ -54,17 +68,15 @@ webr::mount(
5468
)
5569
```
5670

57-
A URL for the filesystem image `.data` file should be provided as the source argument, and the image will be mounted in the virtual filesystem under the path given by the `mountpoint` argument. If the `mountpoint` directory does not exist, it will be created prior to mounting.
58-
59-
### Compression
71+
Filesystem images should be deployed to static file hosting^[e.g. GitHub Pages, Netlify, AWS S3, etc.] and the resulting URL provided as the source argument. The image will be mounted in the virtual filesystem under the path given by the `mountpoint` argument. If the `mountpoint` directory does not exist, it will be created prior to mounting.
6072

61-
Filesystem image `.data` files may optionally be `gzip` compressed prior to deployment. The file extension for compressed filesystem images should be `.data.gz`, and compression should be indicated by setting the property `gzip: true` on the metadata JSON stored in the `.js.metadata` file.
73+
When running under Node.js, the source may also be provided as a relative path to a filesystem image on disk.
6274

6375
### JavaScript API
6476

6577
WebR's JavaScript API includes the [`WebR.FS.mount()`](api/js/classes/WebR.WebR.md#fs) function, a thin wrapper around Emscripten's own [`FS.mount()`](https://emscripten.org/docs/api_reference/Filesystem-API.html#FS.mount). The JavaScript API provides more flexibility but requires a little more set up, including creating the `mountpoint` directory if it does not already exist.
6678

67-
The filesystem type should be provided as a `string`, with the `options` argument a JavaScript object of type [`FSMountOptions`](api/js/modules/WebR.md#fsmountoptions). The filesystem image data should be provided as a JavaScript `Blob` and the metadata as a JavaScript object deserialised from the underlying JSON content.
79+
The filesystem type should be provided as a `string`, with the `options` argument of type [`FSMountOptions`](api/js/modules/WebR.md#fsmountoptions). The filesystem image data should be provided either as a JavaScript `Blob` object or an `ArrayBuffer`-like object, and the metadata provided as a JavaScript object that has been deserialised from the underlying JSON content.
6880

6981
::: {.panel-tabset}
7082
## JavaScript

src/package-lock.json

+6-6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "webr",
3-
"version": "0.4.1",
3+
"version": "0.4.2-dev",
44
"description": "The statistical programming language R compiled into WASM for use in a web browser and node.",
55
"keywords": [
66
"webR",

src/tests/webR/data/test_image.data

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
a, b, c
2+
9, 8, 7
3+
4, 5, 6
4+
x, y, z
5+
1, 2, 3
6+
7, 8, 9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"files":[{"filename":"/abc/bar.csv","start":0,"end":24},{"filename":"/abc/foo.csv","start":24,"end":48}],"remote_package_size":48}

src/tests/webR/data/test_image.tar.gz

783 Bytes
Binary file not shown.
863 Bytes
Binary file not shown.

src/tests/webR/data/testing/foo.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
x, y, z
2+
1, 2, 3
3+
7, 8, 9

src/tests/webR/mount.test.ts

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import { FSMetaData, WebR } from '../../webR/webr-main';
2+
import fs from 'fs';
3+
4+
const webR = new WebR({
5+
baseUrl: '../dist/',
6+
RArgs: ['--quiet'],
7+
});
8+
9+
beforeAll(async () => {
10+
await webR.init();
11+
await webR.evalRVoid('dir.create("/mnt")');
12+
});
13+
14+
async function cleanupMnt() {
15+
try {
16+
await webR.FS.unmount("/mnt");
17+
} catch (e) {
18+
const err = e as Error;
19+
if (err.message !== "FS error") throw err;
20+
}
21+
}
22+
23+
describe('Mount filesystem using R API', () => {
24+
test('Mount v1.0 filesystem image', async () => {
25+
await expect(webR.evalRVoid(
26+
'webr::mount("/mnt", "tests/webR/data/test_image.data", "workerfs")'
27+
)).resolves.not.toThrow();
28+
expect(await webR.evalRString("list.files('/mnt/abc')[2]")).toEqual("foo.csv");
29+
expect(await webR.evalRString("readLines('/mnt/abc/bar.csv')[1]")).toEqual("a, b, c");
30+
await cleanupMnt();
31+
});
32+
33+
test('Mount v2.0 filesystem image', async () => {
34+
await expect(webR.evalRVoid(
35+
'webr::mount("/mnt", "tests/webR/data/test_image.tar.gz", "workerfs")'
36+
)).resolves.not.toThrow();
37+
expect(await webR.evalRString("list.files('/mnt/abc')[2]")).toEqual("foo.csv");
38+
expect(await webR.evalRString("readLines('/mnt/abc/bar.csv')[1]")).toEqual("a, b, c");
39+
await cleanupMnt();
40+
});
41+
42+
test('Mount v2.0 filesystem image - no metadata hint', async () => {
43+
await expect(webR.evalRVoid(
44+
'webr::mount("/mnt", "tests/webR/data/test_image_no_hint.tgz", "workerfs")'
45+
)).resolves.not.toThrow();
46+
expect(await webR.evalRString("list.files('/mnt/abc')[2]")).toEqual("foo.csv");
47+
expect(await webR.evalRString("readLines('/mnt/abc/bar.csv')[1]")).toEqual("a, b, c");
48+
await cleanupMnt();
49+
});
50+
51+
test('Mount filesystem image from URL', async () => {
52+
const url = "https://repo.r-wasm.org/bin/emscripten/contrib/4.4/cli_3.6.3.js.metadata";
53+
await expect(webR.evalRVoid(`
54+
webr::mount("/mnt", "${url}", "workerfs")
55+
`)).resolves.not.toThrow();
56+
expect(await webR.evalRString("readLines('/mnt/DESCRIPTION')[1]")).toEqual("Package: cli");
57+
await cleanupMnt();
58+
});
59+
60+
test('Mount NODEFS filesystem type', async () => {
61+
await expect(webR.evalRVoid(`
62+
webr::mount("/mnt", "tests/webR/data/testing", "nodefs")
63+
`)).resolves.not.toThrow();
64+
expect(await webR.evalRString("readLines('/mnt/foo.csv')[2]")).toEqual("1, 2, 3");
65+
await cleanupMnt();
66+
});
67+
});
68+
69+
describe('Mount filesystem using JS API', () => {
70+
test('Mount filesystem image using Buffer', async () => {
71+
const data = fs.readFileSync("tests/webR/data/test_image.data");
72+
const buf = fs.readFileSync("tests/webR/data/test_image.js.metadata");
73+
const metadata = JSON.parse(new TextDecoder().decode(buf)) as FSMetaData;
74+
await expect(
75+
webR.FS.mount("WORKERFS", { packages: [{ blob: data, metadata: metadata }] }, '/mnt')
76+
).resolves.not.toThrow();
77+
expect(await webR.evalRString("list.files('/mnt/abc')[2]")).toEqual("foo.csv");
78+
expect(await webR.evalRString("readLines('/mnt/abc/bar.csv')[1]")).toEqual("a, b, c");
79+
await cleanupMnt();
80+
});
81+
82+
test('Mount filesystem image using Blob', async () => {
83+
const data = new Blob([fs.readFileSync("tests/webR/data/test_image.data")]);
84+
const buf = fs.readFileSync("tests/webR/data/test_image.js.metadata");
85+
const metadata = JSON.parse(new TextDecoder().decode(buf)) as FSMetaData;
86+
await expect(
87+
webR.FS.mount("WORKERFS", { packages: [{ blob: data, metadata: metadata }] }, '/mnt')
88+
).resolves.not.toThrow();
89+
expect(await webR.evalRString("list.files('/mnt/abc')[2]")).toEqual("foo.csv");
90+
expect(await webR.evalRString("readLines('/mnt/abc/bar.csv')[1]")).toEqual("a, b, c");
91+
await cleanupMnt();
92+
});
93+
94+
test('Mount NODEFS filesystem type', async () => {
95+
await expect(
96+
webR.FS.mount("NODEFS", { root: 'tests/webR/data/testing' }, '/mnt')
97+
).resolves.not.toThrow();
98+
expect(await webR.evalRString("readLines('/mnt/foo.csv')[2]")).toEqual("1, 2, 3");
99+
await cleanupMnt();
100+
});
101+
});

0 commit comments

Comments
 (0)