Created
May 10, 2024 22:39
-
-
Save skeeto/230df9162352ab686c4f788bffd2f5dd to your computer and use it in GitHub Desktop.
Mini lisp-like interpreter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Mini lisp-like interpreter | |
// $ cc -o lisp lisp.c | |
// $ ./lisp "(cons (+ 1 2 3 (* -40 million)) (cdr (quote (1 2 3))))" | |
// (-39999994 2 3) | |
// This is free and unencumbered software released into the public domain. | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#define assert(c) while (!(c)) *(volatile int *)0 = 0 | |
#define new(a, t, n) (t *)alloc(a, sizeof(t), _Alignof(t), n) | |
#define S(s) (str){s, sizeof(s)-1} | |
typedef struct { | |
char *beg; | |
char *end; | |
} arena; | |
static char *alloc(arena *a, ptrdiff_t size, ptrdiff_t align, ptrdiff_t count) | |
{ | |
ptrdiff_t pad = (uintptr_t)a->end & (align - 1); | |
assert(count < (a->end - a->beg - pad)/size); | |
return memset(a->end -= size*count + pad, 0, size*count); | |
} | |
typedef struct { | |
char *data; | |
ptrdiff_t len; | |
} str; | |
static str dup(str s, arena *perm) | |
{ | |
str r = s; | |
r.data = new(perm, char, s.len+1); | |
memcpy(r.data, s.data, s.len); | |
return r; | |
} | |
static _Bool equals(str a, str b) | |
{ | |
return a.len==b.len && (!a.len || !memcmp(a.data, b.data, a.len)); | |
} | |
static uint64_t hash(str s) | |
{ | |
uint64_t r = 0x100; | |
for (ptrdiff_t i = 0; i < s.len; i++) { | |
r ^= s.data[i] & 255; | |
r *= 1111111111111111111u; | |
} | |
return r; | |
} | |
static str import(char *s) | |
{ | |
str r = {0}; | |
r.data = s; | |
for (; s[r.len]; r.len++) {} | |
return r; | |
} | |
static str slice(str s, ptrdiff_t beg, ptrdiff_t end) | |
{ | |
return (str){s.data+beg, end-beg}; | |
} | |
static str skipspace(str s) | |
{ | |
for (; s.len && *s.data<=' '; s = slice(s, 1, s.len)) {} | |
return s; | |
} | |
static int64_t parseint(str s) | |
{ | |
int64_t r = 0; | |
int64_t sign = +1; | |
assert(s.len); | |
switch (*s.data) { | |
case '-': sign = -1; s = slice(s, 1, s.len); break; | |
case '+': sign = +1; s = slice(s, 1, s.len); break; | |
} | |
for (ptrdiff_t i = 0; i < s.len; i++) { | |
r = (uint64_t)r*10 + (s.data[i] - '0'); | |
} | |
return (uint64_t)r * sign; | |
} | |
typedef enum { | |
TOKEN_EOF, | |
TOKEN_LPAREN, | |
TOKEN_RPAREN, | |
TOKEN_SYMBOL, | |
TOKEN_INTEGER, | |
} tokentype; | |
typedef struct { | |
str head; | |
str tail; | |
tokentype type; | |
} token; | |
static token next(str s) | |
{ | |
token r = {0}; | |
s = skipspace(s); | |
if (!s.len) return r; | |
ptrdiff_t len = 1; | |
switch (s.data[0]) { | |
case '(': | |
r.type = TOKEN_LPAREN; | |
break; | |
case ')': | |
r.type = TOKEN_RPAREN; | |
break; | |
case '-': case '+': | |
if (s.len<2 || s.data[1]<'0' || s.data[1]>'9') goto symbol; | |
// fallthrough | |
case '0': case '1': case '2': case '3': case '4': | |
case '5': case '6': case '7': case '8': case '9': | |
r.type = TOKEN_INTEGER; | |
for (; len<s.len && s.data[len]>='0' && s.data[len]<='9'; len++) {} | |
break; | |
default: | |
symbol: | |
r.type = TOKEN_SYMBOL; | |
for (; len<s.len && s.data[len]>' ' | |
&& s.data[len]!='(' | |
&& s.data[len]!=')'; len++) {} | |
} | |
r.head = slice(s, 0, len); | |
r.tail = slice(s, len, s.len); | |
return r; | |
} | |
typedef enum { | |
VALUE_INTEGER, | |
VALUE_SYMBOL, | |
VALUE_CONS, | |
VALUE_PROC, | |
} valuetype; | |
typedef struct value value; | |
typedef struct { | |
value *car; | |
value *cdr; | |
} cons; | |
typedef struct { | |
str name; | |
value *value; | |
} symbol; | |
typedef struct strtab strtab; | |
typedef value *(*proc)(value *args, strtab **st, arena *perm); | |
struct value { | |
union { | |
int64_t integer; | |
symbol symbol; | |
cons cons; | |
proc proc; | |
}; | |
valuetype type; | |
}; | |
static value *newinteger(int64_t x, arena *perm) | |
{ | |
value *r = new(perm, value, 1); | |
r->integer = x; | |
r->type = VALUE_INTEGER; | |
return r; | |
} | |
struct strtab { | |
strtab *child[4]; | |
value key; | |
}; | |
static value *intern(strtab **t, str name, arena *perm) | |
{ | |
for (uint64_t h = hash(name); *t; h <<= 2) { | |
if (equals(name, (*t)->key.symbol.name)) { | |
return &(*t)->key; | |
} | |
t = &(*t)->child[h>>62]; | |
} | |
*t = new(perm, strtab, 1); | |
(*t)->key.symbol.name = dup(name, perm); | |
(*t)->key.type = VALUE_SYMBOL; | |
return &(*t)->key; | |
} | |
static value *newcons(value *car, value *cdr, arena *perm) | |
{ | |
value *r = new(perm, value, 1); | |
r->type = VALUE_CONS; | |
r->cons.car = car; | |
r->cons.cdr = cdr; | |
return r; | |
} | |
static value *newproc(proc p, arena *perm) | |
{ | |
value *r = new(perm, value, 1); | |
r->proc = p; | |
r->type = VALUE_PROC; | |
return r; | |
} | |
typedef struct { | |
value *value; | |
str tail; | |
_Bool eof; | |
} parsed; | |
static parsed parse(str s, strtab **st, arena *perm) | |
{ | |
parsed r = {0}; | |
token t = {0}; | |
t.tail = s; | |
for (;;) { | |
t = next(t.tail); | |
switch (t.type) { | |
case TOKEN_EOF: | |
r.eof = 1; | |
return r; | |
case TOKEN_LPAREN:; | |
value *nil = intern(st, S("nil"), perm); | |
value *head = nil; | |
value **tail = &head; | |
for (;;) { | |
parsed n = parse(t.tail, st, perm); | |
t.tail = n.tail; | |
if (n.eof) return r; // error | |
if (!n.value) break; // rparen | |
*tail = newcons(n.value, nil, perm); | |
tail = &(*tail)->cons.cdr; | |
} | |
r.value = head ? head : nil; | |
r.tail = t.tail; | |
return r; | |
case TOKEN_RPAREN: | |
r.tail = t.tail; | |
return r; | |
case TOKEN_SYMBOL: | |
r.value = intern(st, t.head, perm); | |
r.tail = t.tail; | |
return r; | |
case TOKEN_INTEGER: | |
r.value = newinteger(parseint(t.head), perm); | |
r.tail = t.tail; | |
return r; | |
} | |
} | |
} | |
static void print(value *v, strtab **st, arena *perm) | |
{ | |
switch (v->type) { | |
case VALUE_INTEGER: | |
printf("%lld", (long long)v->integer); | |
break; | |
case VALUE_SYMBOL: | |
fwrite(v->symbol.name.data, v->symbol.name.len, 1, stdout); | |
break; | |
case VALUE_CONS: | |
putchar('('); | |
value *nil = intern(st, S("nil"), perm); | |
for (;;) { | |
print(v->cons.car, st, perm); | |
v = v->cons.cdr; | |
if (v == nil) break; | |
if (v->type != VALUE_CONS) { | |
fputs(" . ", stdout); | |
print(v, st, perm); | |
break; | |
} | |
putchar(' '); | |
} | |
putchar(')'); | |
break; | |
case VALUE_PROC: | |
puts("<proc>"); | |
break; | |
} | |
} | |
static value *eval(value *args, strtab **st, arena *perm) | |
{ | |
value *nil = intern(st, S("nil"), perm); | |
switch (args->type) { | |
case VALUE_CONS:; | |
value *v = args->cons.car; | |
switch (v->type) { | |
case VALUE_CONS: | |
case VALUE_INTEGER: | |
return nil; // error | |
case VALUE_SYMBOL: | |
if (v == intern(st, S("quote"), perm)) { | |
if (args->cons.cdr->type != VALUE_CONS) { | |
return nil; // error | |
} | |
return args->cons.cdr->cons.car; | |
} | |
value *proc = v->symbol.value; | |
if (!proc || proc->type != VALUE_PROC) { | |
return nil; // error | |
} | |
value *head = nil; | |
value **tail = &head; | |
v = args->cons.cdr; | |
for (; v->type == VALUE_CONS; v = v->cons.cdr) { | |
value *arg = eval(v->cons.car, st, perm); | |
*tail = newcons(arg, nil, perm); | |
tail = &(*tail)->cons.cdr; | |
} | |
return proc->proc(head, st, perm); | |
case VALUE_PROC: | |
return v; | |
} | |
break; | |
case VALUE_SYMBOL: | |
if (!args->symbol.value) { | |
return nil; // error: unbound | |
} | |
return args->symbol.value; | |
case VALUE_INTEGER: | |
return args; | |
case VALUE_PROC: | |
return args; | |
} | |
assert(0); | |
return 0; | |
} | |
static value *proc_add(value *args, strtab **st, arena *perm) | |
{ | |
value *r = newinteger(0, perm); | |
for (; args->type == VALUE_CONS; args = args->cons.cdr) { | |
if (args->cons.car->type == VALUE_INTEGER) { | |
r->integer += (uint64_t)args->cons.car->integer; | |
} | |
} | |
return r; | |
} | |
static value *proc_mul(value *args, strtab **st, arena *perm) | |
{ | |
value *r = newinteger(1, perm); | |
for (; args->type == VALUE_CONS; args = args->cons.cdr) { | |
if (args->cons.car->type == VALUE_INTEGER) { | |
r->integer *= (uint64_t)args->cons.car->integer; | |
} | |
} | |
return r; | |
} | |
static value *proc_car(value *args, strtab **st, arena *perm) | |
{ | |
if (args->cons.car->type == VALUE_CONS) { | |
return args->cons.car->cons.car; | |
} | |
return intern(st, S("nil"), perm); // error | |
} | |
static value *proc_cdr(value *args, strtab **st, arena *perm) | |
{ | |
if (args->cons.car->type == VALUE_CONS) { | |
return args->cons.car->cons.cdr; | |
} | |
return intern(st, S("nil"), perm); // error | |
} | |
static value *proc_cons(value *args, strtab **st, arena *perm) | |
{ | |
value *car = args->cons.car; | |
value *cdr = args->cons.cdr; | |
if (cdr->type != VALUE_CONS) { | |
intern(st, S("nil"), perm); // error | |
} | |
return newcons(car, cdr->cons.car, perm); | |
} | |
static void define(strtab **st, str name, value *v, arena *perm) | |
{ | |
intern(st, name, perm)->symbol.value = v; | |
} | |
int main(int argc, char **argv) | |
{ | |
static char heap[1<<24]; // NOTE: technically strict aliasing issue | |
arena scratch = {heap, heap+sizeof(heap)}; | |
for (int i = 1; i < argc; i++) { | |
// Each argv is evaluated in a fresh context, freed at the end | |
// of the loop iteraiton. | |
arena temp = scratch; | |
strtab *st = 0; | |
define(&st, S("+"), newproc(proc_add, &temp), &temp); | |
define(&st, S("*"), newproc(proc_mul, &temp), &temp); | |
define(&st, S("car"), newproc(proc_car, &temp), &temp); | |
define(&st, S("cdr"), newproc(proc_cdr, &temp), &temp); | |
define(&st, S("cons"), newproc(proc_cons, &temp), &temp); | |
define(&st, S("million"), newinteger(1000000, &temp), &temp); | |
value *v = parse(import(argv[i]), &st, &temp).value; | |
if (!v) { | |
puts("ERROR: invalid input"); | |
continue; | |
} | |
value *r = eval(v, &st, &temp); | |
print(r, &st, &temp); | |
putchar('\n'); | |
} | |
fflush(stdout); | |
return ferror(stderr); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment