A domain specific language (DSL) is a tiny embedded language specialized at solving a specific category of problems. Once you start looking for them, they're everywhere; printf formats and regular expressions are two obvious examples.
We're going to build a template engine that allows splicing runtime evaluated expressions into strings. Templates are compiled into operations for the virtual machine we built in previous chapter.
Example:
struct hc_dsl dsl;
hc_dsl_init(&dsl, &hc_malloc_default);
hc_defer(hc_dsl_deinit(&dsl));
struct hc_memory_stream out;
hc_memory_stream_init(&out, &hc_malloc_default);
hc_defer(hc_stream_deinit(&out->stream));
dsl.out = &out.stream;
hc_dsl_set_string(&dsl, "foo", "ghi");
hc_dsl_eval(&dsl, "abc $(print (upcase foo)) def");
assert(strcmp("abc GHI def", hc_memory_stream_string(&out)) == 0);Our DSL consists of an environment, a standard output and a vm.
struct hc_dsl {
struct hc_set env;
struct hc_stream *out;
struct hc_vm vm;
};
void hc_dsl_init(struct hc_dsl *dsl) {
hc_set_init(&dsl->env, malloc, sizeof(struct env_item), env_cmp);
dsl->env.key = env_key;
dsl->out = hc_stdout();
hc_vm_init(vm, &hc_malloc_default);
hc_dsl_set_fun(vm, "print", lib_print);
hc_dsl_set_fun(vm, "upcase", lib_upcase);
}
enum hc_order env_cmp(const void *x, const void *y) {
return hc_strcmp(*(const char **)x, *(const char **)y);
}
const void *env_key(const void *x) {
return &((const struct env_item *)x)->key;
}print pops a value from the stack and prints it to vm.stdout.
void lib_print(struct hc_vm *vm, const struct hc_sloc sloc) {
struct hc_value *v = hc_vm_pop(vm);
struct hc_dsl *dsl = hc_baseof(vm, struct hc_dsl, vm);
hc_value_print(v, dsl->out);
hc_value_deinit(v);
}While upcase transforms the top value on the stack to uppercase.
void lib_upcase(struct hc_vm *vm, const struct hc_sloc sloc) {
struct hc_value *v = hc_vm_peek(vm);
if (v->type != &HC_STRING) {
hc_throw("Error in %s: Expected string (%s)",
hc_sloc_string(&sloc), v->type->name);
}
hc_upcase(v->as_string);
}
char *hc_upcase(char *s) {
while (*s) {
*s = toupper(*s);
s++;
}
return s;
}The only missing piece of the puzzle at this point is transforming template code into VM operations, aka. syntax.
void hc_dsl_eval(struct hc_dsl *dsl, const char *in) {
struct hc_list forms;
hc_list_init(&forms);
hc_defer(hc_forms_free(&forms));
struct hc_sloc sloc = hc_sloc("eval", 0, 0);
while (hc_read_next(&in, &forms, &sloc));
const size_t pc = dsl->vm.code.length;
hc_forms_emit(&forms, dsl);
hc_vm_eval(&dsl->vm, pc, -1);
}The top layer of our parser simply checks for $ and uses that decide what do next.
bool hc_read_next(const char **in,
struct hc_list *out,
struct hc_sloc *sloc) {
if (**in == '$') {
(*in)++;
hc_read_call(in, out, sloc);
return true;
}
return hc_read_text(in, out, sloc);
}A call consists of a target and optional arguments.
void hc_read_call(const char **in,
struct hc_list *out,
struct hc_sloc *sloc) {
struct hc_sloc floc = *sloc;
if (**in != '(') {
hc_throw("Error in %s: Invalid call syntax",
hc_sloc_string(sloc));
}
(*in)++;
sloc->col++;
hc_skip_ws(in, sloc);
if (!hc_read_expr(in, out, sloc)) {
hc_throw("Error in %s: Missing call target",
hc_sloc_string(sloc));
}
struct hc_form *t = hc_baseof(hc_list_pop_back(out),
struct hc_form,
owner);
hc_list_init(&t->owner);
struct hc_call *f = malloc(sizeof(struct hc_call));
hc_call_init(f, floc, out, t);
for (bool done = false; !done;) {
hc_skip_ws(in, sloc);
switch (**in) {
case 0:
hc_form_free(f);
hc_throw("Error in %s: Open call form",
hc_sloc_string(sloc));
case ')':
(*in)++;
sloc->col++;
done = true;
continue;
default:
break;
}
if (!hc_read_expr(in, &f->args, sloc)) {
hc_form_free(f);
hc_throw("Error in %s: Invalid call syntax",
hc_sloc_string(sloc));
}
}
}When emitted, calls get the value of the target and emits arguments if any followed by a `HC_CALL``-operation.
static void call_emit(struct hc_form *_f, struct hc_vm *vm) {
struct hc_call *f = hc_baseof(_f, struct hc_call, form);
struct hc_value *t = hc_form_value(f->target, vm);
if (!t) {
hc_throw("Error in %s: Missing call target",
hc_sloc_string(&_f->sloc));
}
if (t->type != &HC_VM_FUN) {
hc_throw("Error in %s: '%s' isn't callable",
hc_sloc_string(&_f->sloc),
t->type->name);
}
hc_list_do(&f->args, a) {
hc_form_emit(hc_baseof(a, struct hc_form, owner), vm);
}
hc_vm_emit(vm,
&HC_CALL,
&(struct hc_call_op){
.target = t->as_other,
.sloc = _f->sloc
});
}hc_skip_ws() simply skips forward as long as the current char is some kind of whitespace.
void hc_skip_ws(const char **in, struct hc_sloc *sloc) {
for (;; (*in)++) {
switch (**in) {
case ' ':
case '\t':
sloc->col++;
break;
case '\n':
sloc->row++;
sloc->col = 0;
break;
default:
return;
}
}
}hc_read_expr() handles anything allowed inside $(), which means another call or an identifier.
bool hc_read_expr(const char **in,
struct hc_list *out,
struct hc_sloc *sloc) {
const char c = **in;
switch (c) {
case '(':
hc_read_call(in, out, sloc);
return true;
default:
if (isalpha(c)) {
hc_read_id(in, out, sloc);
return true;
}
break;
}
return false;
}Identifiers are required to start with an alphabetic char; following that, anything except whitespace and parens is allowed.
void hc_read_id(const char **in,
struct hc_list *out,
struct hc_sloc *sloc) {
struct hc_sloc floc = *sloc;
struct hc_memory_stream buf;
hc_memory_stream_init(&buf, &hc_malloc_default);
hc_defer(hc_stream_deinit(&buf.stream));
char c = 0;
while ((c = **in)) {
if (isspace(c) || c == '(' || c == ')') {
break;
}
hc_putc(&buf.stream, c);
sloc->col++;
(*in)++;
}
struct hc_id *f = malloc(sizeof(struct hc_id));
hc_id_init(f, floc, out, hc_memory_stream_string(&buf));
}Identifiers get their values from dsl.env and emit an operation to push it on the stack.
void id_emit(struct hc_form *_f, struct hc_dsl *dsl) {
struct hc_id *f = hc_baseof(_f, struct hc_id, form);
struct hc_value *v = hc_dsl_getenv(dsl, f->name);
if (!v) {
hc_throw("Error in %s: Unknown identifier '%s'",
hc_sloc_string(&_f->sloc), f->name);
}
struct hc_push_op op;
hc_value_copy(&op.value, v);
hc_vm_emit(&dsl->vm, &HC_PUSH, &op);
}The text parser keeps going until a $ is found or until it reaches the end of the string, it then constructs a print call with the text as argument.
bool hc_read_text(const char **in,
struct hc_list *out,
struct hc_sloc *sloc) {
struct hc_sloc floc = *sloc;
const char *start = *in;
while (**in && **in != '$') {
if (**in == '\n') {
sloc->row++;
} else {
sloc->col++;
}
(*in)++;
}
size_t n = *in - start;
if (n) {
struct hc_value v;
hc_value_init(&v, &HC_STRING)->as_string = strndup(start, n);
struct hc_literal *vf = malloc(sizeof(struct hc_literal));
hc_literal_init(vf, floc, out);
vf->value = v;
struct hc_id *t = malloc(sizeof(struct hc_literal));
hc_id_init(t, floc, NULL, "print");
struct hc_call *c = malloc(sizeof(struct hc_call));
hc_call_init(c, floc, out, &t->form);
return true;
}
return false;
}