Created
December 25, 2011 07:39
-
-
Save nyuichi/1518857 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(use srfi-1) | |
(define (normalize form) | |
(cond | |
[(atom? form) form] | |
[(and (list? form) (macro? (car form))) form] | |
[(list? form) | |
(case (car form) | |
((define) (normalize-define form)) | |
((lambda) (normalize-lambda form)) | |
((set!) (normalize-set! form)) | |
((if) (normalize-if form)) | |
((quote) (normalize-quote form)) | |
((begin) (normalize-begin form)) | |
(else (normalize-app form)))])) | |
(define (normalize-define form) | |
(assert (>= (length form) 3) "in define" form) | |
(cond | |
[(pair? (cadr form)) | |
(let* ([formals (cadr form)] | |
[body (cddr form)] | |
[name (car formals)] | |
[args (cdr formals)] | |
[fn `(lambda ,args ,@body)]) | |
(normalize `(define ,name ,fn)))] | |
[else | |
(let ([var (cadr form)] | |
[val (cddr form)]) | |
(assert (symbol? var) "in define, do not bind a value" form) | |
`(define ,var ,(normalize val)))])) | |
(define (normalize-begin form) | |
(assert (>= (length form) 2) "in begin" form) | |
(let ([body (cdr form)]) | |
(cond | |
[(single? body) | |
(normalize body)] | |
[else | |
`(begin ,(car body) | |
,(normalize (cdr body)))]))) | |
(define (normalize-lambda form) | |
(assert (>= (length form) 2) "in lambda" form) | |
(let ([formals (cadr form)] | |
[body (cddr form)]) | |
(assert (symbol-tree? formals) "in lambda, do not bind values" form) | |
`(lambda ,formals ,(normalize body)))) | |
(define (normalize-if form) | |
(let ([len (length form)]) | |
(cond | |
[(= len 3) | |
(normalize-if `(if ,(cadr form) | |
,(caddr form) | |
'()))] | |
[(= len 4) | |
`(if ,(normalize (cadr form)) | |
,(normalize (caddr form)) | |
,(normalize (cadddr form)))] | |
[else | |
(normalization-error)]))) | |
(define (normalize-set! form) | |
(assert (= (length form) 3) "in set!" form) | |
(let ([var (cadr form)] | |
[val (caddr form)]) | |
(assert (symbol? var) "in set!, set to value" form) | |
`(set! form ,(normalize val)))) | |
(define (normalize-quote form) | |
(assert (single? (cdr form)) "in quote" form) | |
form) | |
(define (normalize-app form) | |
(assert (proper-list? form) "in application") | |
(map normalize form)) | |
(define user-syntax | |
'(cond)) | |
(define (macro? id) | |
(member id user-syntax)) | |
(define (atom? form) | |
(or (null? form) | |
(not (pair? form)))) | |
(define (single? form) | |
(and (pair? form) (null? (cdr form)))) | |
(define (symbol-tree? form) | |
(cond | |
[(atom? form) (or (symbol? form) | |
(null? form))] | |
[(pair? form) (and (symbol-tree? (car form)) | |
(symbol-tree? (cdr form)))])) | |
(define (assert test . comments) | |
(when (not test) | |
(print "assertion error:") | |
(map print comments))) | |
(define (main args) | |
(let* ([filename (car args)] | |
[p (open-input-file (car args))]) | |
(call/cc | |
(lambda (exit) | |
(while #t | |
(let ([sexp (read p)]) | |
(if (eof-object? sexp) | |
(exit #f) | |
(print (normalize sexp))))))))) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <wchar.h> | |
/* TODO: Memory Management */ | |
/* TODO: syntax-case */ | |
/* TODO: call/cc by CPS conversion */ | |
/* TODO: isolate side effects in walker */ | |
/* | |
Unsafety: | |
All pic-prefixed functions don't assert types of the arguments. | |
Types will be checked by 'Soft Typing' system. | |
Be careful when you use c functions directly. | |
(Currently, other c functions don't check types either.) | |
*/ | |
/* | |
Naming Convension: | |
C functions with prefix 'pic_' might be used from scheme. | |
Those with prefix '_' are internal functions (for implementation). | |
*/ | |
/* | |
malloc: | |
When allocating memory, it is necessary to use gc_malloc instead of gnu malloc. | |
Memory allocated by gc_malloc will be initialized by zero. | |
*/ | |
void * gc_malloc(int size) | |
{ | |
return calloc(1, size); | |
} | |
/* | |
Address Tag | |
________ ________ ________ ______00 : Heap object / Null | |
________ ________ ________ ______01 : Fixednum | |
11111111 11111111 11111111 11111111 : True | |
Class Tag : | |
An id representing the class to which the object belongs. | |
It is allocated at the first word of the object and ranges | |
from 0 (= NULL) to 0xffffffff. | |
*/ | |
typedef intptr_t PicObj; | |
typedef int PicOpt; | |
#define PicNull 0 | |
#define PicTrue INTPTR_MAX | |
#define PIC_PAIR_CLASS 1 | |
#define PIC_STRING_CLASS 2 | |
#define PIC_SYMBOL_CLASS 3 | |
#define PIC_CHAR_CLASS 4 | |
#define PIC_CLOSURE_CLASS 5 | |
typedef struct Env * Env; | |
typedef struct { | |
PicOpt opt; | |
PicObj car; | |
PicObj cdr; | |
} PicPair; | |
typedef struct { | |
PicOpt opt; | |
PicObj formals; | |
PicObj body; | |
Env env; | |
} PicClosure; | |
typedef struct { | |
PicOpt opt; | |
PicObj frees; | |
PicObj form; | |
Env env; | |
} PicSynclo; | |
typedef struct { | |
PicObj opt; | |
wchar_t ch; | |
} PicChar; | |
typedef PicPair PicString; | |
typedef struct { | |
PicObj opt; | |
PicObj repr; | |
} PicSymbol; | |
#define pic_nullp(v) (!v) | |
#define pic_truep(v) (v==PicTrue) | |
#define address_tag(v) (v&3) | |
#define pic_fixnump(v) (address_tag(v) == 1) | |
#define pic_heapp(v) (address_tag(v) == 0) | |
#define pic_pairp(v) ({ PicObj _v = v; pic_heapp(_v) && ((PicObj*)_v)[0] == PIC_PAIR_CLASS; }) | |
#define pic_charp(v) ({ PicObj _v = v; pic_heapp(_v) && ((PicObj*)_v)[0] == PIC_CHAR_CLASS; }) | |
#define pic_stringp(v) ({ PicObj _v = v; pic_heapp(_v) && ((PicObj*)_v)[0] == PIC_STRING_CLASS; }) | |
#define pic_symbolp(v) ({ PicObj _v = v; pic_heapp(_v) && ((PicObj*)_v)[0] == PIC_SYMBOL_CLASS; }) | |
#define pic_closurep(v) ({ PicObj _v = v; pic_heapp(_v) && ((PicObj*)_v)[0] == PIC_CLOSURE_CLASS; }) | |
#define pic_atomp(v) (pic_nullp(v) || pic_truep(v) || pic_stringp(v) || \ | |
pic_charp(v) || pic_fixnump(v)) | |
#define int2fixnum(v) ((PicObj)((v<<2)+1)) | |
#define fixnum2int(v) (((int)v)>>2) | |
#define pic_car(v) (((PicPair*)v)->car) | |
#define pic_cdr(v) (((PicPair*)v)->cdr) | |
#define pic_caar(v) (pic_car(pic_car(v))) | |
#define pic_cadr(v) (pic_car(pic_cdr(v))) | |
#define pic_cdar(v) (pic_cdr(pic_car(v))) | |
#define pic_cddr(v) (pic_cdr(pic_cdr(v))) | |
#define pic_caaar(v) (pic_car(pic_caar(v))) | |
#define pic_caadr(v) (pic_car(pic_cadr(v))) | |
#define pic_cadar(v) (pic_car(pic_cdar(v))) | |
#define pic_caddr(v) (pic_car(pic_cddr(v))) | |
#define pic_cdaar(v) (pic_cdr(pic_caar(v))) | |
#define pic_cdadr(v) (pic_cdr(pic_cadr(v))) | |
#define pic_cddar(v) (pic_cdr(pic_cdar(v))) | |
#define pic_cdddr(v) (pic_cdr(pic_cddr(v))) | |
#define pic_cadddr(v) (pic_car(pic_cdddr(v))) | |
#define clos_formals(c) (((PicClosure*)c)->formals) | |
#define clos_body(c) (((PicClosure*)c)->body) | |
#define clos_env(c) (((PicClosure*)c)->env) | |
#define symbol_repr(s) (((PicSymbol*)s)->repr) | |
/* holy crap prototypes */ | |
PicObj pic_cons(PicObj car, PicObj cdr); | |
PicObj pic_string_reverse(PicObj list); | |
PicObj _pic_string_reverse(PicObj list, PicObj acc); | |
PicObj pic_list_length(PicObj list); | |
int _pic_list_length(PicObj list, int n); | |
PicObj pic_string_cons(PicObj car, PicObj cdr); | |
PicObj pic_string_equalp(PicObj str1, PicObj str2); | |
PicObj _wchar_to_picchar(wchar_t ch); | |
wchar_t * _picstring_to_wcstring(PicObj list); | |
PicObj _picstring_equals_wcstringp(PicObj str1, wchar_t * str2, int offset); | |
void next(); | |
void back(); | |
void get_token(); | |
PicObj read(); | |
PicObj read_pair(); | |
PicObj wrap(wchar_t * wrapper, PicObj form); | |
void write(PicObj obj); | |
void write_pair(PicObj pair); | |
void bind(Env env, PicObj symbol, PicObj val); | |
void assign(Env env, PicObj symbol, PicObj val); | |
PicObj eval(PicObj form, Env env); | |
PicObj apply(PicObj callee, PicObj args); | |
PicObj eval_all(PicObj list, Env env); | |
void bind_all(PicObj vars, PicObj args, Env env); | |
PicObj make_closure(PicObj formals, PicObj body, Env env) | |
{ | |
PicClosure * clos = gc_malloc(sizeof(PicClosure)); | |
clos->opt = PIC_CLOSURE_CLASS; | |
clos->formals = formals; | |
clos->body = body; | |
clos->env = env; | |
return (PicObj)clos; | |
} | |
PicObj make_symbol(PicObj repr) | |
{ | |
PicSymbol * sym = gc_malloc(sizeof(PicSymbol)); | |
sym->opt = PIC_SYMBOL_CLASS; | |
sym->repr = repr; | |
return (PicObj)sym; | |
} | |
PicObj pic_cons(PicObj car, PicObj cdr) | |
{ | |
PicPair * p = gc_malloc(sizeof(PicPair)); | |
p->opt = PIC_PAIR_CLASS; | |
p->car = car; | |
p->cdr = cdr; | |
return (PicObj)p; | |
} | |
PicObj pic_string_reverse(PicObj list) | |
{ | |
return _pic_string_reverse(list, PicNull); | |
} | |
PicObj _pic_string_reverse(PicObj list, PicObj acc) | |
{ | |
if (pic_nullp(list)) { | |
return acc; | |
} else { | |
return _pic_string_reverse(pic_cdr(list), pic_string_cons(pic_car(list), acc)); | |
} | |
} | |
PicObj pic_list_length(PicObj list) | |
{ | |
return int2fixnum(_pic_list_length(list, 0)); | |
} | |
int _pic_list_length(PicObj list, int n) | |
{ | |
if (pic_nullp(list)) { | |
return n; | |
} else { | |
return _pic_list_length(pic_cdr(list), n+1); | |
} | |
} | |
/* Character and String */ | |
#define _picchar_to_wchar(v) (((PicChar*)v)->ch) | |
PicObj pic_string_cons(PicObj car, PicObj cdr) | |
{ | |
PicPair * p = gc_malloc(sizeof(PicPair)); | |
p->opt = PIC_STRING_CLASS; | |
p->car = car; | |
p->cdr = cdr; | |
return (PicObj)p; | |
} | |
PicObj pic_string_equalp(PicObj str1, PicObj str2) | |
{ | |
if (pic_nullp(str1) && pic_nullp(str2)) { | |
return PicTrue; | |
} else if (pic_nullp(str1) || pic_nullp(str2)) { | |
return PicNull; | |
} else { | |
return pic_string_equalp(pic_cdr(str1), pic_cdr(str2)); | |
} | |
} | |
PicObj _wchar_to_picchar(wchar_t ch) | |
{ | |
PicChar * c = gc_malloc(sizeof(PicChar)); | |
c->opt = PIC_CHAR_CLASS; | |
c->ch = ch; | |
return (PicObj)c; | |
} | |
wchar_t * _picstring_to_wcstring(PicObj list) | |
{ | |
int len = _pic_list_length(list, 0); | |
wchar_t * str = gc_malloc(sizeof(wchar_t) * (len + 1)); | |
for (int i = 0; i < len; i++, list = pic_cdr(list)) | |
str[i] = _picchar_to_wchar(pic_car(list)); | |
return str; | |
} | |
PicObj _wcstring_to_picstring(wchar_t * str, PicObj acc, int offset) | |
{ | |
if (str[offset] == L'\0') { | |
return pic_string_reverse(acc); | |
} else { | |
return _wcstring_to_picstring(str, pic_cons(_wchar_to_picchar(str[offset]), acc), offset+1); | |
} | |
} | |
PicObj _picstring_equals_wcstringp(PicObj str1, wchar_t * str2, int offset) | |
{ | |
if (pic_nullp(str1) && str2[offset] == L'\0') return PicTrue; | |
if (pic_nullp(str1) || str2[offset] == L'\0') return PicNull; | |
if (_picchar_to_wchar(pic_car(str1)) != str2[offset]) return PicNull; | |
return _picstring_equals_wcstringp(pic_cdr(str1), str2, offset+1); | |
} | |
/* Tokenize */ | |
struct { | |
enum { | |
eof, | |
parenl, | |
parenr, | |
dot, | |
quote, | |
quasiquote, | |
unquote, | |
unquote_splicing, | |
number, | |
string, | |
symbol, | |
} kind; | |
PicObj data; | |
} token; | |
int backtrack = 0; | |
void next() | |
{ | |
if (!backtrack) { | |
get_token(); | |
} | |
backtrack = 0; | |
} | |
void back() | |
{ | |
backtrack = 1; | |
} | |
int spacep(wchar_t c) | |
{ | |
return wcschr(L" \r\t\n", c) != NULL; | |
} | |
void get_token() | |
{ | |
wchar_t c = getwc(stdin); | |
if (spacep(c)) return get_token(); | |
switch (c) { | |
case WEOF: { token.kind = eof; break; } | |
case '(' : { token.kind = parenl; break; } | |
case ')' : { token.kind = parenr; break; } | |
case '.' : { token.kind = dot; break; } | |
case '\'': { token.kind = quote; break; } | |
case '`' : { token.kind = quasiquote; break; } | |
case ',' : { | |
token.kind = ((c = getwc(stdin)) == L'@')? | |
unquote_splicing : (ungetwc(c, stdin), unquote); | |
break; | |
} | |
case ';' : { | |
while (getwc(stdin) != '\n'); | |
return get_token(); | |
} | |
case '"' : { | |
PicObj buff = PicNull; | |
for (c = getwc(stdin); c != L'"'; c = getwc(stdin)) { | |
buff = pic_string_cons(_wchar_to_picchar(c), buff); | |
} | |
ungetwc(c, stdin); | |
buff = pic_string_reverse(buff); | |
token.kind = string; | |
token.data = buff; | |
} | |
default : { | |
PicObj buff; | |
for (buff = PicNull; wcschr(L"() \t\r\n", c) == NULL; c = getwc(stdin)) { | |
buff = pic_string_cons(_wchar_to_picchar(c), buff); | |
} | |
ungetwc(c, stdin); | |
buff = pic_string_reverse(buff); | |
token.kind = symbol; | |
token.data = make_symbol(buff); | |
} | |
} | |
} | |
/* Read */ | |
PicObj read() | |
{ | |
next(); | |
switch(token.kind) { | |
case number: return token.data; | |
case symbol: return token.data; | |
case string: return token.data; | |
case parenl: return read_pair(); | |
case quote: return wrap(L"quote", read()); | |
case quasiquote: return wrap(L"quasiquote", read()); | |
case unquote: return wrap(L"unquote", read()); | |
case unquote_splicing: return wrap(L"unquote-splicing", read()); | |
case parenr: | |
case eof: | |
case dot: | |
puts("syntax error: extra close parenthesis "); /* TODO */ | |
return PicNull; | |
} | |
} | |
PicObj read_pair() | |
{ | |
next(); | |
if (token.kind == parenr) { | |
return PicNull; | |
} else { | |
back(); | |
PicObj car = read(); | |
PicObj cdr = read_pair(); | |
return pic_cons(car, cdr); | |
} | |
} | |
PicObj wrap(wchar_t * wrapper, PicObj form) | |
{ | |
PicObj tag = make_symbol(_wcstring_to_picstring(wrapper, PicNull, 0)); | |
return pic_cons(tag, pic_cons(form, PicNull)); | |
} | |
/* HashTable */ | |
typedef PicObj * Table; | |
const int table_size = 11; | |
Table make_table(); | |
int hash(PicObj str, int h); | |
void put(Table table, PicObj key, PicObj val); | |
PicObj get(Table table, PicObj key); | |
PicObj locate_in_chain(PicObj key, PicObj chain); | |
Env make_env(Env meta); | |
PicObj lookup(Env env, PicObj symbol); | |
Table make_table() | |
{ | |
return gc_malloc(sizeof(PicObj) * table_size); | |
} | |
int hash(PicObj str, int h) | |
{ | |
if (pic_nullp(str)) { | |
return h; | |
} else { | |
return hash(pic_cdr(str), 31*h + _picchar_to_wchar(pic_car(str))); | |
} | |
} | |
void put(Table table, PicObj key, PicObj val) /* TODO: proper overriding */ | |
{ | |
int i = hash(key, 0) % table_size; | |
table[i] = pic_cons(pic_cons(key, val), table[i]); | |
} | |
PicObj get(Table table, PicObj key) | |
{ | |
int i = hash(key, 0) % table_size; | |
return locate_in_chain(key, table[i]); | |
} | |
PicObj locate_in_chain(PicObj key, PicObj chain) | |
{ | |
if (pic_nullp(chain)) { | |
puts("locate failed"); | |
return PicNull; | |
} else { | |
if (pic_string_equalp(key, pic_caar(chain))) { | |
return pic_cdar(chain); | |
} else { | |
return locate_in_chain(key, pic_cdr(chain)); | |
} | |
} | |
} | |
/* Environment */ | |
struct Env { | |
Table table; | |
Env meta; | |
}; | |
#define env_toplevelp(env) (env->meta == NULL) | |
Env make_env(Env meta) | |
{ | |
Env env = gc_malloc(sizeof(struct Env)); | |
env->table = make_table(); | |
env->meta = meta; | |
return env; | |
} | |
PicObj lookup(Env env, PicObj symbol) | |
{ | |
if (env == NULL) { | |
return PicNull; | |
} else { | |
return (get(env->table, symbol_repr(symbol)))?: lookup(env->meta, symbol); | |
} | |
} | |
void bind(Env env, PicObj symbol, PicObj val) | |
{ | |
put(env->table, symbol_repr(symbol), val); | |
} | |
void assign(Env env, PicObj symbol, PicObj val) | |
{ | |
PicObj slot = get(env->table, symbol_repr(symbol)); | |
if (!pic_nullp(slot)) { | |
bind(env, symbol, val); | |
} else if (!env_toplevelp(env)) { | |
assign(env->meta, symbol, val); | |
} else { | |
wprintf(L"Variable %s is not defined", _picstring_to_wcstring(symbol_repr(symbol))); | |
} | |
} | |
/* Eval & Apply */ | |
PicObj eval(PicObj form, Env env) | |
{ | |
if (pic_atomp(form)) { return form; | |
} else if (pic_symbolp(form)) { return lookup(env, form); | |
} else if (pic_pairp(form)) { | |
if (pic_symbolp(pic_car(form))) | |
{ | |
PicObj op = symbol_repr(pic_car(form)); | |
if (_picstring_equals_wcstringp(op, L"quote", 0)) return pic_cadr(form); | |
if (_picstring_equals_wcstringp(op, L"lambda", 0)) goto LAMBDA; | |
if (_picstring_equals_wcstringp(op, L"begin", 0)) goto BEGIN; | |
if (_picstring_equals_wcstringp(op, L"if", 0)) goto IF; | |
if (_picstring_equals_wcstringp(op, L"define", 0)) goto DEFINE; | |
if (_picstring_equals_wcstringp(op, L"set!", 0)) goto SETBANG; | |
} | |
goto PROC_CALL; | |
} else { | |
return puts("malformed expression"); | |
} | |
LAMBDA: | |
return make_closure(pic_cadr(form), pic_caddr(form), env); | |
BEGIN: | |
return eval(pic_cadr(form), env), eval(pic_caddr(form), env); | |
IF: { | |
PicObj test = eval(pic_cadr(form), env); | |
PicObj brch = (pic_nullp(test))? pic_caddr(form) : pic_cadddr(form); | |
return eval(brch, env); | |
} | |
DEFINE: { | |
PicObj var = pic_cadr(form); | |
PicObj val = pic_caddr(form); | |
return bind(env, var, eval(val, env)), var; | |
} | |
SETBANG: { | |
PicObj var = pic_cadr(form); | |
PicObj val = pic_caddr(form); | |
return assign(env, var, eval(val, env)), var; | |
} | |
PROC_CALL: { | |
PicObj app = eval_all(form, env); | |
return apply(pic_car(app), pic_cdr(app)); | |
} | |
} | |
PicObj apply(PicObj callee, PicObj args) | |
{ | |
if (pic_closurep(callee)) { | |
Env env = make_env(clos_env(callee)); | |
bind_all(clos_formals(callee), args, env); | |
return eval(clos_body(callee), env); | |
} | |
} | |
PicObj eval_all(PicObj list, Env env) | |
{ | |
if (pic_nullp(list)) { | |
return PicNull; | |
} else { | |
PicObj head = eval(pic_car(list), env); | |
PicObj tail = eval_all(pic_cdr(list), env); | |
return pic_cons(head, tail); | |
} | |
} | |
void bind_all(PicObj vars, PicObj args, Env env) | |
{ | |
if (pic_nullp(vars)) { | |
return; | |
} else { | |
bind(env, pic_car(vars), pic_car(args)); | |
return bind_all(pic_cdr(vars), pic_cdr(args), env); | |
} | |
} | |
/* Write */ | |
void write(PicObj obj) | |
{ | |
if (pic_nullp(obj)) wprintf(L"()"); | |
else if (pic_pairp(obj)) write_pair(obj); | |
else if (pic_truep(obj)) wprintf(L"true"); | |
else if (pic_charp(obj)) wprintf(L"%c", _picchar_to_wchar(obj)); | |
else if (pic_symbolp(obj)) wprintf(L"%s", _picstring_to_wcstring(symbol_repr(obj))); | |
else if (pic_stringp(obj)) wprintf(L"\"%s\"", _picstring_to_wcstring(obj)); | |
else if (pic_fixnump(obj)) wprintf(L"%d", fixnum2int(obj)); | |
else if (pic_closurep(obj)) wprintf(L"#<procedure>"); | |
else wprintf(L"<unknown>"); | |
fflush(stdout); | |
} | |
void write_pair(PicObj pair) | |
{ | |
wprintf(L"("); | |
write(pic_car(pair)); | |
wprintf(L" . "); | |
write(pic_cdr(pair)); | |
wprintf(L")"); | |
} | |
/* Mainloop */ | |
void repl() | |
{ | |
Env top = make_env(NULL); | |
for(;;) { | |
printf(">>> "); | |
PicObj form = read(); | |
printf("Read: "); write(form); puts(""); | |
write(eval(form, top)); | |
puts(""); | |
} | |
} | |
int main() | |
{ | |
repl(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment