Created
July 18, 2025 07:23
-
-
Save 7etsuo/9a661babba35bda8e8d840a7c7f0fd57 to your computer and use it in GitHub Desktop.
Turing-complete Scheme interpreter in C that supports lexical scoping, closures, continuations, and proper tail-call for tail recursion without stack growth.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#include <string.h> | |
typedef struct Obj *Obj; | |
typedef struct Kont *Kont; | |
enum Type { T_PAIR, T_SYMBOL, T_NUMBER, T_BOOLEAN, T_PRIMITIVE, T_CLOSURE, T_CONTINUATION }; | |
struct Obj { | |
enum Type type; | |
union { | |
char *symbol; | |
long number; | |
struct { | |
Obj car; | |
Obj cdr; | |
} pair; | |
struct { | |
Obj (*fn)(Obj args); | |
} primitive; | |
struct { | |
Obj params; | |
Obj body; | |
Obj env; | |
} closure; | |
struct { | |
Kont kont; | |
} continuation; | |
} data; | |
}; | |
enum KontType { K_HALT, K_IF, K_SEQ, K_OP, K_ARG, K_DEFINE, K_SET, K_CALLCC }; | |
struct Kont { | |
enum KontType type; | |
Kont next; | |
union { | |
struct { | |
Obj thenb; | |
Obj elseb; | |
Obj env; | |
} if_k; | |
struct { | |
Obj rest; | |
Obj env; | |
} seq_k; | |
struct { | |
Obj args; | |
Obj env; | |
} op_k; | |
struct { | |
Obj rest; | |
Obj accum; | |
Obj op; | |
Obj env; | |
} arg_k; | |
struct { | |
Obj var; | |
Obj env; | |
} define_k; | |
struct { | |
Obj var; | |
Obj env; | |
} set_k; | |
} data; | |
}; | |
Obj alloc_obj(enum Type t) { | |
Obj o = malloc(sizeof(struct Obj)); | |
if (!o) { | |
fprintf(stderr, "out of memory\n"); | |
exit(1); | |
} | |
o->type = t; | |
return o; | |
} | |
Kont alloc_kont(enum KontType t, Kont n) { | |
Kont k = malloc(sizeof(struct Kont)); | |
if (!k) { | |
fprintf(stderr, "out of memory\n"); | |
exit(1); | |
} | |
k->type = t; | |
k->next = n; | |
return k; | |
} | |
Kont copy_kont(Kont k) { | |
if (!k) return NULL; | |
Kont new = malloc(sizeof(struct Kont)); | |
if (!new) { | |
fprintf(stderr, "out of memory\n"); | |
exit(1); | |
} | |
*new = *k; | |
new->next = copy_kont(k->next); | |
return new; | |
} | |
Obj cons(Obj car, Obj cdr) { | |
Obj o = alloc_obj(T_PAIR); | |
o->data.pair.car = car; | |
o->data.pair.cdr = cdr; | |
return o; | |
} | |
Obj car(Obj o) { | |
if (o && o->type == T_PAIR) return o->data.pair.car; | |
fprintf(stderr, "car of non-pair\n"); | |
exit(1); | |
return NULL; | |
} | |
Obj cdr(Obj o) { | |
if (o && o->type == T_PAIR) return o->data.pair.cdr; | |
fprintf(stderr, "cdr of non-pair\n"); | |
exit(1); | |
return NULL; | |
} | |
Obj cadr(Obj o) { return car(cdr(o)); } | |
Obj caddr(Obj o) { return car(cdr(cdr(o))); } | |
Obj cadddr(Obj o) { return car(cdr(cdr(cdr(o)))); } | |
void error(const char *msg) { | |
fprintf(stderr, "%s\n", msg); | |
exit(1); | |
} | |
Obj false_obj, true_obj; | |
void init_constants() { | |
false_obj = alloc_obj(T_BOOLEAN); | |
false_obj->data.number = 0; | |
true_obj = alloc_obj(T_BOOLEAN); | |
true_obj->data.number = 1; | |
} | |
int is_true(Obj o) { return o != false_obj; } | |
Obj intern(const char *name) { | |
Obj o = alloc_obj(T_SYMBOL); | |
o->data.symbol = strdup(name); | |
if (!o->data.symbol) error("out of memory"); | |
return o; | |
} | |
int equal(Obj a, Obj b) { | |
if (a == b) return 1; | |
if (!a || !b) return 0; | |
if (a->type != b->type) return 0; | |
switch (a->type) { | |
case T_SYMBOL: return strcmp(a->data.symbol, b->data.symbol) == 0; | |
case T_NUMBER: return a->data.number == b->data.number; | |
case T_BOOLEAN: return a->data.number == b->data.number; | |
case T_PAIR: return equal(car(a), car(b)) && equal(cdr(a), cdr(b)); | |
default: return 0; | |
} | |
} | |
Obj global_env; | |
void add_binding(Obj env, Obj sym, Obj val) { | |
Obj binding = cons(sym, val); | |
Obj alist = car(env); | |
env->data.pair.car = cons(binding, alist); | |
} | |
Obj make_primitive(Obj (*fn)(Obj)) { | |
Obj o = alloc_obj(T_PRIMITIVE); | |
o->data.primitive.fn = fn; | |
return o; | |
} | |
Obj lookup(Obj env, Obj var) { | |
for (Obj frame = env; frame; frame = cdr(frame)) { | |
for (Obj alist = car(frame); alist; alist = cdr(alist)) { | |
Obj binding = car(alist); | |
if (equal(car(binding), var)) return cdr(binding); | |
} | |
} | |
error("unbound variable"); | |
return NULL; | |
} | |
void set_var(Obj env, Obj var, Obj val) { | |
for (Obj frame = env; frame; frame = cdr(frame)) { | |
for (Obj alist = car(frame); alist; alist = cdr(alist)) { | |
Obj binding = car(alist); | |
if (equal(car(binding), var)) { | |
binding->data.pair.cdr = val; | |
return; | |
} | |
} | |
} | |
error("unbound variable for set!"); | |
} | |
Obj extend_env(Obj params, Obj args, Obj base) { | |
Obj alist = NULL; | |
Obj p = params, a = args; | |
while (p) { | |
if (!a) error("too few arguments"); | |
alist = cons(cons(car(p), car(a)), alist); | |
p = cdr(p); | |
a = cdr(a); | |
} | |
if (a) error("too many arguments"); | |
return cons(alist, base); | |
} | |
int list_length(Obj l) { | |
int n = 0; | |
for (; l; l = cdr(l)) n++; | |
return n; | |
} | |
Obj list_reverse(Obj l) { | |
Obj r = NULL; | |
for (; l; l = cdr(l)) r = cons(car(l), r); | |
return r; | |
} | |
Obj prim_plus(Obj args) { | |
long sum = 0; | |
for (Obj l = args; l; l = cdr(l)) { | |
Obj arg = car(l); | |
if (arg->type != T_NUMBER) error("+ expects numbers"); | |
sum += arg->data.number; | |
} | |
Obj res = alloc_obj(T_NUMBER); | |
res->data.number = sum; | |
return res; | |
} | |
Obj prim_mult(Obj args) { | |
long prod = 1; | |
for (Obj l = args; l; l = cdr(l)) { | |
Obj arg = car(l); | |
if (arg->type != T_NUMBER) error("* expects numbers"); | |
prod *= arg->data.number; | |
} | |
Obj res = alloc_obj(T_NUMBER); | |
res->data.number = prod; | |
return res; | |
} | |
Obj prim_minus(Obj args) { | |
if (!args) error("- needs at least one arg"); | |
Obj first = car(args); | |
if (first->type != T_NUMBER) error("- expects numbers"); | |
long res = first->data.number; | |
if (!cdr(args)) { | |
Obj o = alloc_obj(T_NUMBER); | |
o->data.number = -res; | |
return o; | |
} | |
for (Obj l = cdr(args); l; l = cdr(l)) { | |
Obj arg = car(l); | |
if (arg->type != T_NUMBER) error("- expects numbers"); | |
res -= arg->data.number; | |
} | |
Obj o = alloc_obj(T_NUMBER); | |
o->data.number = res; | |
return o; | |
} | |
Obj prim_eq(Obj args) { | |
if (list_length(args) != 2) error("= takes two args"); | |
Obj a = car(args), b = cadr(args); | |
if (a->type != T_NUMBER || b->type != T_NUMBER) error("= for numbers"); | |
return a->data.number == b->data.number ? true_obj : false_obj; | |
} | |
Obj prim_cons(Obj args) { | |
if (list_length(args) != 2) error("cons takes two args"); | |
return cons(car(args), cadr(args)); | |
} | |
Obj prim_car(Obj args) { | |
if (list_length(args) != 1) error("car takes one arg"); | |
return car(car(args)); | |
} | |
Obj prim_cdr(Obj args) { | |
if (list_length(args) != 1) error("cdr takes one arg"); | |
return cdr(car(args)); | |
} | |
void init_globals() { | |
global_env = cons(NULL, NULL); | |
add_binding(global_env, intern("+"), make_primitive(prim_plus)); | |
add_binding(global_env, intern("*"), make_primitive(prim_mult)); | |
add_binding(global_env, intern("-"), make_primitive(prim_minus)); | |
add_binding(global_env, intern("="), make_primitive(prim_eq)); | |
add_binding(global_env, intern("cons"), make_primitive(prim_cons)); | |
add_binding(global_env, intern("car"), make_primitive(prim_car)); | |
add_binding(global_env, intern("cdr"), make_primitive(prim_cdr)); | |
} | |
Obj read_expr(char **ps) { | |
while (isspace(**ps)) (*ps)++; | |
if (**ps == '\0') return NULL; | |
char ch = **ps; | |
if (ch == '(') { | |
(*ps)++; | |
Obj list = NULL; | |
Obj tail = NULL; | |
while (1) { | |
while (isspace(**ps)) (*ps)++; | |
if (**ps == ')') { | |
(*ps)++; | |
return list; | |
} | |
if (**ps == '\0') error("unclosed list"); | |
Obj item = read_expr(ps); | |
Obj newp = cons(item, NULL); | |
if (tail) tail->data.pair.cdr = newp; | |
else list = newp; | |
tail = newp; | |
} | |
} else if (ch == '\'') { | |
(*ps)++; | |
Obj item = read_expr(ps); | |
return cons(intern("quote"), cons(item, NULL)); | |
} else if (ch == '#') { | |
(*ps)++; | |
ch = **ps; | |
if (ch == 't') { | |
(*ps)++; | |
return true_obj; | |
} else if (ch == 'f') { | |
(*ps)++; | |
return false_obj; | |
} else error("invalid #"); | |
} else if (isdigit(ch) || ch == '-') { | |
int sign = 1; | |
long num = 0; | |
if (ch == '-') { | |
sign = -1; | |
(*ps)++; | |
} | |
while (isdigit(**ps)) { | |
num = num * 10 + (**ps - '0'); | |
(*ps)++; | |
} | |
Obj o = alloc_obj(T_NUMBER); | |
o->data.number = sign * num; | |
return o; | |
} else if (isalpha(ch) || strchr("+*-=><!?", ch)) { | |
char buf[256]; | |
int i = 0; | |
buf[i++] = ch; | |
(*ps)++; | |
while (isalnum(**ps) || strchr("+*-=><!?", **ps)) { | |
buf[i++] = **ps; | |
(*ps)++; | |
} | |
buf[i] = 0; | |
return intern(buf); | |
} else error("invalid character"); | |
return NULL; | |
} | |
void print_obj(Obj o) { | |
if (!o) { | |
printf("()"); | |
return; | |
} | |
switch (o->type) { | |
case T_PAIR: | |
printf("("); | |
print_obj(car(o)); | |
for (Obj t = cdr(o); t; t = cdr(t)) { | |
printf(" "); | |
print_obj(car(t)); | |
} | |
printf(")"); | |
break; | |
case T_SYMBOL: | |
printf("%s", o->data.symbol); | |
break; | |
case T_NUMBER: | |
printf("%ld", o->data.number); | |
break; | |
case T_BOOLEAN: | |
printf("#%c", o->data.number ? 't' : 'f'); | |
break; | |
case T_CLOSURE: | |
printf("<closure>"); | |
break; | |
case T_PRIMITIVE: | |
printf("<primitive>"); | |
break; | |
case T_CONTINUATION: | |
printf("<continuation>"); | |
break; | |
} | |
} | |
Obj eval_expr(Obj initial_expr) { | |
Obj expr = initial_expr; | |
Obj env = global_env; | |
Kont kont = alloc_kont(K_HALT, NULL); | |
Obj value = NULL; | |
Obj op = NULL; | |
Obj args_list = NULL; | |
eval_loop: | |
if (!expr) { | |
value = NULL; | |
goto continue_val; | |
} | |
switch (expr->type) { | |
case T_NUMBER: | |
case T_BOOLEAN: | |
value = expr; | |
goto continue_val; | |
case T_SYMBOL: | |
value = lookup(env, expr); | |
goto continue_val; | |
case T_PAIR: { | |
Obj head = car(expr); | |
if (head->type == T_SYMBOL) { | |
char *sym = head->data.symbol; | |
if (strcmp(sym, "quote") == 0) { | |
value = cadr(expr); | |
goto continue_val; | |
} else if (strcmp(sym, "if") == 0) { | |
Kont newk = alloc_kont(K_IF, kont); | |
newk->data.if_k.thenb = caddr(expr); | |
newk->data.if_k.elseb = cadddr(expr); | |
newk->data.if_k.env = env; | |
kont = newk; | |
expr = cadr(expr); | |
goto eval_loop; | |
} else if (strcmp(sym, "lambda") == 0) { | |
Obj cl = alloc_obj(T_CLOSURE); | |
cl->data.closure.params = cadr(expr); | |
cl->data.closure.body = cddr(expr); | |
cl->data.closure.env = env; | |
value = cl; | |
goto continue_val; | |
} else if (strcmp(sym, "begin") == 0) { | |
Obj body = cdr(expr); | |
if (!body) { | |
value = NULL; | |
goto continue_val; | |
} | |
expr = car(body); | |
Obj rest = cdr(body); | |
if (rest) { | |
Kont newk = alloc_kont(K_SEQ, kont); | |
newk->data.seq_k.rest = rest; | |
newk->data.seq_k.env = env; | |
kont = newk; | |
} | |
goto eval_loop; | |
} else if (strcmp(sym, "define") == 0) { | |
Obj var = cadr(expr); | |
if (var->type != T_SYMBOL) error("define not on symbol"); | |
Kont newk = alloc_kont(K_DEFINE, kont); | |
newk->data.define_k.var = var; | |
newk->data.define_k.env = env; | |
kont = newk; | |
expr = caddr(expr); | |
goto eval_loop; | |
} else if (strcmp(sym, "set!") == 0) { | |
Obj var = cadr(expr); | |
if (var->type != T_SYMBOL) error("set! not on symbol"); | |
Kont newk = alloc_kont(K_SET, kont); | |
newk->data.set_k.var = var; | |
newk->data.set_k.env = env; | |
kont = newk; | |
expr = caddr(expr); | |
goto eval_loop; | |
} else if (strcmp(sym, "call/cc") == 0) { | |
Kont newk = alloc_kont(K_CALLCC, kont); | |
kont = newk; | |
expr = cadr(expr); | |
goto eval_loop; | |
} else { | |
goto do_application; | |
} | |
} else { | |
do_application: | |
Kont newk = alloc_kont(K_OP, kont); | |
newk->data.op_k.args = cdr(expr); | |
newk->data.op_k.env = env; | |
kont = newk; | |
expr = car(expr); | |
goto eval_loop; | |
} | |
break; | |
} | |
default: | |
error("cannot eval"); | |
} | |
continue_val: | |
if (kont->type == K_HALT) { | |
return value; | |
} | |
Kont oldk = kont; | |
kont = kont->next; | |
switch (oldk->type) { | |
case K_IF: | |
expr = is_true(value) ? oldk->data.if_k.thenb : oldk->data.if_k.elseb; | |
env = oldk->data.if_k.env; | |
goto eval_loop; | |
case K_SEQ: | |
expr = car(oldk->data.seq_k.rest); | |
Obj next_rest = cdr(oldk->data.seq_k.rest); | |
env = oldk->data.seq_k.env; | |
if (next_rest) { | |
Kont newk = alloc_kont(K_SEQ, kont); | |
newk->data.seq_k.rest = next_rest; | |
newk->data.seq_k.env = env; | |
kont = newk; | |
} | |
goto eval_loop; | |
case K_OP: { | |
op = value; | |
Obj args = oldk->data.op_k.args; | |
Obj e = oldk->data.op_k.env; | |
if (!args) { | |
args_list = NULL; | |
goto apply; | |
} | |
Kont newk = alloc_kont(K_ARG, kont); | |
newk->data.arg_k.rest = cdr(args); | |
newk->data.arg_k.accum = NULL; | |
newk->data.arg_k.op = op; | |
newk->data.arg_k.env = e; | |
kont = newk; | |
expr = car(args); | |
env = e; | |
goto eval_loop; | |
} | |
case K_ARG: { | |
Obj arg_val = value; | |
Obj accum = cons(arg_val, oldk->data.arg_k.accum); | |
Obj rest = oldk->data.arg_k.rest; | |
op = oldk->data.arg_k.op; | |
Obj e = oldk->data.arg_k.env; | |
if (!rest) { | |
args_list = list_reverse(accum); | |
goto apply; | |
} | |
Kont newk = alloc_kont(K_ARG, kont); | |
newk->data.arg_k.rest = cdr(rest); | |
newk->data.arg_k.accum = accum; | |
newk->data.arg_k.op = op; | |
newk->data.arg_k.env = e; | |
kont = newk; | |
expr = car(rest); | |
env = e; | |
goto eval_loop; | |
} | |
case K_DEFINE: { | |
Obj var = oldk->data.define_k.var; | |
Obj e = oldk->data.define_k.env; | |
Obj binding = cons(var, value); | |
e->data.pair.car = cons(binding, car(e)); | |
value = NULL; | |
goto continue_val; | |
} | |
case K_SET: { | |
Obj var = oldk->data.set_k.var; | |
Obj e = oldk->data.set_k.env; | |
set_var(e, var, value); | |
value = NULL; | |
goto continue_val; | |
} | |
case K_CALLCC: { | |
op = value; | |
Obj cont = alloc_obj(T_CONTINUATION); | |
cont->data.continuation.kont = copy_kont(kont); | |
args_list = cons(cont, NULL); | |
goto apply; | |
} | |
default: | |
error("unknown kont"); | |
} | |
apply: | |
if (op->type == T_PRIMITIVE) { | |
value = op->data.primitive.fn(args_list); | |
goto continue_val; | |
} else if (op->type == T_CLOSURE) { | |
Obj params = op->data.closure.params; | |
Obj body = op->data.closure.body; | |
Obj cl_env = op->data.closure.env; | |
Obj new_env = extend_env(params, args_list, cl_env); | |
if (!body) { | |
value = NULL; | |
goto continue_val; | |
} | |
if (!cdr(body)) { | |
expr = car(body); | |
env = new_env; | |
goto eval_loop; | |
} else { | |
Kont newk = alloc_kont(K_SEQ, kont); | |
newk->data.seq_k.rest = cdr(body); | |
newk->data.seq_k.env = new_env; | |
kont = newk; | |
expr = car(body); | |
env = new_env; | |
goto eval_loop; | |
} | |
} else if (op->type == T_CONTINUATION) { | |
if (list_length(args_list) != 1) error("continuation takes one argument"); | |
value = car(args_list); | |
kont = copy_kont(op->data.continuation.kont); | |
goto continue_val; | |
} else { | |
error("not a procedure"); | |
} | |
return NULL; // unreachable | |
} | |
int main() { | |
init_constants(); | |
init_globals(); | |
// Example 1: Closure | |
char *ex1 = "(define make-add (lambda (x) (lambda (y) (+ x y)))) ((make-add 3) 4)"; | |
char *p1 = ex1; | |
Obj def1 = read_expr(&p1); | |
eval_expr(def1); | |
Obj call1 = read_expr(&p1); | |
Obj res1 = eval_expr(call1); | |
printf("Result of closure example: "); | |
print_obj(res1); | |
printf("\n"); | |
// Example 2: Tail-recursive factorial | |
char *ex2 = "(define fact (lambda (n acc) (if (= n 0) acc (fact (- n 1) (* n acc))))) (fact 5 1)"; | |
char *p2 = ex2; | |
Obj def2 = read_expr(&p2); | |
eval_expr(def2); | |
Obj call2 = read_expr(&p2); | |
Obj res2 = eval_expr(call2); | |
printf("Factorial of 5: "); | |
print_obj(res2); | |
printf("\n"); | |
// Example 3: Continuation | |
char *ex3 = "(+ 1 (call/cc (lambda (k) (k 2))))"; | |
char *p3 = ex3; | |
Obj expr3 = read_expr(&p3); | |
Obj res3 = eval_expr(expr3); | |
printf("Continuation example: "); | |
print_obj(res3); | |
printf("\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment