Skip to content

Commit 314e5fe

Browse files
committed
Rewrite most of the C runtime in Rust πŸŽ‰ πŸ¦€
Unsurprising thing to come out of Rustfest! Woo! The crate now exports a binary and a shared library. The library is linked to the final executable and provides most of the primitives that was written in C. This is a significant improvement since so much code can be reused now and there is no need to write the same functions in multiple languages. Some of the types can now be improved, like aliasing runtime tags into something like `Object` instead of naked i64. cbindgen avoids a lot of boilerplate and helps in keeping function signatures and constants in sync. This patch is a net reduction in size even with over 40 lines of generated headers.
1 parent dcf18ef commit 314e5fe

File tree

9 files changed

+219
-257
lines changed

9 files changed

+219
-257
lines changed

β€Žrs/Cargo.tomlβ€Ž

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ repository = "https://github.com/jaseemabid/inc"
99
categories = []
1010
keywords = ["compiler", "x86", "scheme"]
1111

12+
[lib]
13+
crate_type = ["dylib", "rlib"]
14+
path = "src/lib.rs"
15+
1216
[[bin]]
1317
name = "ss"
1418
path = "src/main.rs"

β€Žrs/Makefileβ€Ž

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ CCFLAGS = -g -ggdb3 -m64 -Wall -Wno-override-module -fno-asynchronous-unwind-tab
2727

2828
.DEFAULT_GOAL := inc
2929
inc: runtime.c inc.s
30-
gcc $(CCFLAGS) $^ -o inc
30+
clang -L./target/debug -linc -ldl -lpthread $(CCFLAGS) $^ -o inc
31+
32+
# cargo install --force cbindgen
33+
inc.h: src
34+
cbindgen -l C --crate inc --output inc.h
3135

3236
.PHONY: test
3337
test:

β€Žrs/inc.hβ€Ž

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#include <stdarg.h>
2+
#include <stdbool.h>
3+
#include <stdint.h>
4+
#include <stdlib.h>
5+
6+
#define BOOL 1
7+
8+
#define CHAR 2
9+
10+
#define FALSE 1
11+
12+
#define MASK 7
13+
14+
#define NIL 4
15+
16+
#define NUM 0
17+
18+
#define PAIR 3
19+
20+
#define SHIFT 3
21+
22+
#define STR 5
23+
24+
#define SYM 6
25+
26+
#define TRUE 9
27+
28+
#define VEC 7
29+
30+
#define WORDSIZE 8
31+
32+
int64_t car(int64_t val);
33+
34+
int64_t cdr(int64_t val);
35+
36+
void print(int64_t val, bool nested);
37+
38+
int64_t rt_open_write(int64_t fname);
39+
40+
uintptr_t string_length(int64_t val);
41+
42+
int64_t symbol_eq(int64_t a, int64_t b);
43+
44+
int64_t writeln(int64_t data, int64_t port);

β€Žrs/runtime.cβ€Ž

Lines changed: 3 additions & 253 deletions
Original file line numberDiff line numberDiff line change
@@ -1,145 +1,15 @@
11
#include <assert.h>
2-
#include <fcntl.h>
32
#include <inttypes.h>
43
#include <signal.h>
54
#include <stdbool.h>
65
#include <stddef.h>
7-
#include <stdint.h>
86
#include <stdio.h>
9-
#include <stdlib.h>
107
#include <unistd.h>
8+
#include "inc.h"
119

12-
// The default calling convention used by GCC on x86-64 seems to be System V
13-
// AMD64 ABI, in which arguments are passed in the registers RDI, RSI, RDX, RCX,
14-
// R8, R9 and the return value is passed back in RAX. See x86 module docs for
15-
// more details.
10+
// Explicitly link to the assembly entry point
1611
extern int64_t init(int64_t*) __attribute__((noinline));
1712

18-
const int64_t numtag = 0;
19-
const int64_t booltag = 1;
20-
const int64_t chartag = 2;
21-
const int64_t pairtag = 3;
22-
const int64_t niltag = 4;
23-
const int64_t strtag = 5;
24-
const int64_t symtag = 6;
25-
const int64_t vectag = 7;
26-
27-
const int64_t shift = 3;
28-
const int64_t mask = 7;
29-
30-
const int64_t bool_f = (0 << shift) | booltag;
31-
const int64_t bool_t = (1 << shift) | booltag;
32-
33-
/*
34-
Stdlib
35-
36-
Arguments and return values are immediate encoded
37-
*/
38-
39-
int64_t string_length(int64_t val);
40-
int64_t symbol_eq(int64_t a, int64_t b);
41-
42-
/*
43-
Internal definitions
44-
45-
A value of type `int64_t` is usually an immediate encoded value, and a pointer
46-
type usually refers to raw data.
47-
*/
48-
49-
char *get_str(int64_t val);
50-
char *get_sym_name(int64_t val);
51-
char get_char(int64_t val);
52-
int64_t get_num(int64_t val);
53-
int64_t get_pair_car(int64_t val);
54-
int64_t get_pair_cdr(int64_t val);
55-
int64_t get_strlen(int64_t val);
56-
int64_t get_sym_id(int64_t val);
57-
int64_t get_sym_len(int64_t val);
58-
int64_t get_vec_len(int64_t val);
59-
int64_t get_vec_nth(int64_t val, int n);
60-
61-
/* IO helpers */
62-
63-
int64_t rt_open_write(int64_t fname);
64-
int64_t writeln(int64_t str, int64_t port);
65-
66-
/* --- */
67-
68-
void print(int64_t val, bool nested) {
69-
70-
if ((val & mask) == numtag) {
71-
printf("%" PRId64, get_num(val));
72-
73-
} else if ((val & mask) == chartag) {
74-
char c = get_char(val);
75-
76-
if (c == '\t')
77-
printf("#\\tab");
78-
else if (c == '\n')
79-
printf("#\\newline");
80-
else if (c == '\r')
81-
printf("#\\return");
82-
else if (c == ' ')
83-
printf("#\\space");
84-
else
85-
printf("#\\%c", c);
86-
87-
} else if (val == bool_t) {
88-
printf("#t");
89-
90-
} else if (val == bool_f) {
91-
printf("#f");
92-
93-
} else if (val == niltag) {
94-
printf("()");
95-
96-
} else if ((val & mask) == pairtag) {
97-
int64_t car = get_pair_car(val);
98-
int64_t cdr = get_pair_cdr(val);
99-
100-
if (!nested) printf("(");
101-
102-
print(car, false);
103-
104-
if (cdr != niltag) {
105-
if ((cdr & mask) != pairtag) {
106-
printf(" . ");
107-
print(cdr, false);
108-
} else {
109-
printf(" ");
110-
print(cdr, true);
111-
}
112-
}
113-
if (!nested) printf(")");
114-
115-
} else if ((val & mask) == strtag) {
116-
// A string in memory is a pair of length and a pointer to a blob of
117-
// bytes - ideally guaranteed by the compiler to be valid UTF-8. See
118-
// compiler module for documentation on the layout.
119-
printf("\"");
120-
fwrite(get_str(val), 1, get_strlen(val), stdout);
121-
printf("\"");
122-
123-
} else if ((val & mask) == symtag) {
124-
printf("'");
125-
fwrite(get_sym_name(val), 1, get_sym_len(val), stdout);
126-
127-
} else if ((val & mask) == vectag) {
128-
printf("[");
129-
for (int i = 0; i < get_vec_len(val); i++) {
130-
print(get_vec_nth(val, i), false);
131-
if (i != get_vec_len(val) - 1) {
132-
printf(" ");
133-
}
134-
}
135-
printf("]");
136-
}
137-
138-
else {
139-
printf("Runtime Error: unknown value returned: `%" PRId64 " `\n", val);
140-
}
141-
}
142-
14313
void set_handler(void (*handler)(int, siginfo_t *, void *)) {
14414
struct sigaction action;
14515
action.sa_flags = SA_SIGINFO;
@@ -159,7 +29,6 @@ void handler(int signo, siginfo_t *info, __attribute__((unused)) void *extra) {
15929
}
16030

16131
int main() {
162-
16332
FILE *debug = getenv("DEBUG") ? stderr : fopen("/dev/null", "w");
16433
fprintf(debug, "%s\n\n", "The glorious incremental compiler");
16534

@@ -189,126 +58,7 @@ int main() {
18958

19059
print(val, false);
19160
printf("\n");
61+
fflush(stdout);
19262

19363
free(heap);
19464
}
195-
196-
/*
197-
Stdlib
198-
199-
Arguments and return values are immediate encoded
200-
*/
201-
202-
int64_t string_length(int64_t val) {
203-
return get_strlen(val) << shift;
204-
}
205-
206-
int64_t symbol_eq(int64_t a, int64_t b) {
207-
return ((a == b) && (a & mask) == symtag) ? bool_t : bool_f;
208-
}
209-
210-
int64_t type(int64_t a) {
211-
return (a & mask) << shift;
212-
}
213-
214-
/*
215-
Internal definitions
216-
217-
A value of type `int64_t` is usually an immediate encoded value, and a pointer
218-
type usually refers to raw data.
219-
*/
220-
221-
/* Get raw values from immediate encoded values */
222-
int64_t get_num(int64_t val) {
223-
assert((val & mask) == numtag);
224-
225-
return val >> shift;
226-
}
227-
228-
char get_char(int64_t val) {
229-
assert((val & mask) == chartag);
230-
231-
return val >> shift;
232-
}
233-
234-
int64_t get_pair_car(int64_t val) {
235-
assert((val & mask) == pairtag);
236-
237-
int64_t *p = (int64_t *)(val - pairtag);
238-
return *p;
239-
}
240-
241-
int64_t get_pair_cdr(int64_t val) {
242-
assert((val & mask) == pairtag);
243-
244-
int64_t *p = (int64_t *)(val - pairtag);
245-
return *(p + 1);
246-
}
247-
248-
int64_t get_sym_id(int64_t val) {
249-
assert((val & mask) == symtag);
250-
251-
int64_t *p = (int64_t *)(val - symtag);
252-
return *p;
253-
}
254-
255-
int64_t get_sym_len(int64_t val) {
256-
assert((val & mask) == symtag);
257-
258-
int64_t *p = (int64_t *)(val - symtag);
259-
return *(p + 1);
260-
}
261-
262-
char *get_sym_name(int64_t val) {
263-
assert((val & mask) == symtag);
264-
265-
int64_t *p = (int64_t *)(val - symtag);
266-
return (char *)(p + 2);
267-
}
268-
269-
char *get_str(int64_t val) {
270-
assert((val & mask) == strtag);
271-
272-
int64_t *p = (int64_t *)(val - strtag);
273-
int64_t *str = p + 1;
274-
275-
return (char *)str;
276-
}
277-
278-
int64_t get_strlen(int64_t val) {
279-
assert((val & mask) == strtag);
280-
281-
int64_t *p = (int64_t *)(val - strtag);
282-
return *p;
283-
}
284-
285-
int64_t get_vec_len(int64_t val) {
286-
assert((val & mask) == vectag);
287-
288-
int64_t *p = (int64_t *)(val - vectag);
289-
return *p;
290-
}
291-
292-
int64_t get_vec_nth(int64_t val, int n) {
293-
int64_t *p = (int64_t *)(val - vectag);
294-
return *(p + n + 1);
295-
}
296-
297-
/* IO helpers */
298-
299-
int64_t rt_open_write(int64_t fname) {
300-
char *name = get_str(fname);
301-
302-
int fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0640);
303-
return fd * 8;
304-
}
305-
306-
int64_t writeln(int64_t str, int64_t port) {
307-
int64_t fd = get_num(get_vec_nth(port, 2));
308-
char *data = get_str(str);
309-
int len = get_strlen(str);
310-
311-
write(fd, data, len);
312-
write(fd, "\n", 1);
313-
return niltag;
314-
}

β€Žrs/src/cli.rsβ€Ž

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ pub fn build(config: &Config) -> Result<(), Error> {
6262
.arg("-ggdb3")
6363
.arg("-fomit-frame-pointer")
6464
.arg("-fno-asynchronous-unwind-tables")
65+
.arg("-L./target/debug")
66+
.arg("-linc")
67+
.arg("-ldl")
68+
.arg("-lpthread")
6569
.arg("-O0")
6670
.arg("runtime.c")
6771
.arg(&config.asm())
@@ -84,6 +88,9 @@ pub fn build(config: &Config) -> Result<(), Error> {
8488
}
8589

8690
/// Run the generated binary and return output
91+
// Cargo automatically sets the LD_LIBRARY_PATH, which is really convenient here
92+
// because the generated binary is dynamically linked to an artifact in the
93+
// target folder.
8794
pub fn exec(config: &Config) -> Result<Option<String>, Error> {
8895
let path = PathBuf::from(&config.output).canonicalize()?;
8996
let exe = Command::new(&path).output()?;

β€Žrs/src/immediate.rsβ€Ž

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ pub const VEC: i64 = 7;
2525
pub const SHIFT: i64 = 3;
2626
pub const MASK: i64 = 0b0000_0111;
2727

28-
pub const FALSE: i64 = (0 << SHIFT) | BOOL;
29-
pub const TRUE: i64 = (1 << SHIFT) | BOOL;
28+
// cbindgen only understands simple constants
29+
pub const FALSE: i64 = 1; // (0 << SHIFT) | BOOL;
30+
pub const TRUE: i64 = 9; // (1 << SHIFT) | BOOL;
3031

3132
/// Immediate representation of an expression.
3233
pub fn to(prog: &Expr) -> Option<i64> {

β€Žrs/src/lib.rsβ€Ž

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ pub mod lambda;
150150
pub mod lang;
151151
pub mod parser;
152152
pub mod primitives;
153+
pub mod rt;
153154
pub mod runtime;
154155
pub mod strings;
155156
pub mod symbols;

0 commit comments

Comments
Β (0)