Last active
August 29, 2015 13:57
-
-
Save andersonsp/9744663 to your computer and use it in GitHub Desktop.
Valenok's QVM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <string.h> | |
#define KEY_LENGTH 64 | |
#define HTAB_SIZE 4096 | |
typedef struct | |
{ | |
char* key; | |
int value; | |
} htable_node; | |
typedef struct | |
{ | |
int num_nodes; | |
htable_node* nodes[HTAB_SIZE]; | |
} hash_table; | |
hash_table* create_htab(); | |
void destroy_htab(hash_table* htab); | |
inline int htab_add(hash_table* htab, char* key, int value); | |
inline int htab_find(hash_table* htab, char* key); | |
inline unsigned int htab_hash(char* key); | |
hash_table* create_htab() | |
{ | |
hash_table* htab; | |
htab = (hash_table*)malloc(sizeof(hash_table)); | |
int i; for(i = 0; i < HTAB_SIZE; i++) htab->nodes[i] = NULL; | |
return htab; | |
} | |
void destroy_htab(hash_table* htab) | |
{ | |
int i; for(i = 0; i < HTAB_SIZE; i++) | |
{ | |
if(htab->nodes[i]) | |
{ | |
free(htab->nodes[i]->key); | |
free(htab->nodes[i]); | |
} | |
} | |
free(htab); | |
htab = NULL; | |
} | |
int htab_add(hash_table* htab, char* k, int v) | |
{ | |
int hash = htab_hash(k); | |
// If the node is not already occupied, allocate space, and copy the key/value pair. | |
if(htab->nodes[hash] == NULL) htab->nodes[hash] = malloc(sizeof(htable_node)); | |
else return 1; | |
htab->nodes[hash]->key = (char*)malloc(sizeof(char) * (strlen(k) + 1)); | |
strcpy(htab->nodes[hash]->key, k); | |
htab->nodes[hash]->value = v; | |
return 0; | |
} | |
int htab_find(hash_table* htab, char* key) | |
{ | |
int hash = htab_hash(key); | |
if(htab->nodes[hash] != NULL) return htab->nodes[hash]->value; | |
else return -1; | |
} | |
unsigned int htab_hash(char* k) | |
{ | |
unsigned int hash = 1; | |
char* c; for(c = k; *c; c++) | |
hash += (hash << *c) - *c; | |
return hash % HTAB_SIZE; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#define ICACHE 0 /* nonzero to enable point-of-send inline cache */ | |
#define MCACHE 0 /* nonzero to enable global method cache */ | |
typedef struct _Vtable Vtable; | |
typedef struct _Object Object; | |
typedef struct _Closure Closure; | |
typedef struct _Symbol Symbol; | |
typedef Object *(*imp_t)(Closure *closure, Object *receiver, ...); | |
struct _Vtable { | |
Vtable *vt; | |
int size; | |
int tally; | |
Object **keys; | |
Object **values; | |
Vtable *parent; | |
}; | |
struct _Object { | |
Vtable *vt; | |
}; | |
struct _Closure { | |
Vtable *vt; | |
imp_t method; | |
Object *data; | |
}; | |
struct _Symbol { | |
Vtable *vt; | |
char *string; | |
}; | |
Vtable *SymbolList= 0; | |
Vtable *vtable_vt; | |
Vtable *object_vt; | |
Vtable *symbol_vt; | |
Vtable *closure_vt; | |
Object *s_addMethod = 0; | |
Object *s_allocate = 0; | |
Object *s_delegated = 0; | |
Object *s_lookup = 0; | |
extern inline void *alloc(size_t size) { | |
Vtable **ppvt= (Vtable **)calloc(1, sizeof(Vtable *) + size); | |
return (void *)(ppvt + 1); | |
} | |
Object *symbol_new(char *string) { | |
Symbol *symbol = (Symbol *)alloc(sizeof(Symbol)); | |
symbol->vt = symbol_vt; | |
symbol->string = strdup(string); | |
return (Object *)symbol; | |
} | |
Object *closure_new(imp_t method, Object *data) { | |
Closure *closure = (Closure *)alloc(sizeof(Closure)); | |
closure->vt = closure_vt; | |
closure->method = method; | |
closure->data = data; | |
return (Object *)closure; | |
} | |
Object *vtable_lookup(Closure *closure, Vtable *self, Object *key); | |
#if ICACHE | |
# define send(RCV, MSG, ARGS...) ({ \ | |
struct object *r = (Object *)(RCV); \ | |
static Vtable *prevVT = 0; \ | |
static Closure *closure = 0; \ | |
register Vtable *thisVT = r->vt; \ | |
thisVT == prevVT \ | |
? closure \ | |
: (prevVT = thisVT, \ | |
closure = bind(r, (MSG))); \ | |
closure->method(closure, r, ##ARGS); \ | |
}) | |
#else | |
# define send(RCV, MSG, ARGS...) ({ \ | |
Object *r = (Object *)(RCV); \ | |
Closure *c = bind(r, (MSG)); \ | |
c->method(c, r, ##ARGS); \ | |
}) | |
#endif | |
#if MCACHE | |
struct entry { | |
Vtable *vtable; | |
Object *selector; | |
Closure *closure; | |
} MethodCache[8192]; | |
#endif | |
Closure *bind(Object *rcv, Object *msg) { | |
Closure *c; | |
Vtable *vt = rcv->vt; | |
#if MCACHE | |
struct entry *cl = MethodCache + ((((unsigned)vt << 2) ^ ((unsigned)msg >> 3)) & ((sizeof(MethodCache) / sizeof(struct entry)) - 1)); | |
if (cl->vtable == vt && cl->selector == msg) | |
return cl->closure; | |
#endif | |
c = ((msg == s_lookup) && (rcv == (Object *)vtable_vt)) | |
? (Closure *)vtable_lookup(0, vt, msg) | |
: (Closure *)send(vt, s_lookup, msg); | |
#if MCACHE | |
cl->vtable = vt; | |
cl->selector = msg; | |
cl->closure = c; | |
#endif | |
return c; | |
} | |
Vtable *vtable_delegated(Closure *closure, Vtable *self) { | |
Vtable *child= (Vtable *)alloc(sizeof(Vtable)); | |
child->vt = self ? self->vt : 0; | |
child->size = 2; | |
child->tally = 0; | |
child->keys = (Object **)calloc(child->size, sizeof(Object *)); | |
child->values = (Object **)calloc(child->size, sizeof(Object *)); | |
child->parent = self; | |
return child; | |
} | |
Object *vtable_allocate(Closure *closure, Vtable *self, int payloadSize) { | |
Object *object = (Object *)alloc(payloadSize); | |
object->vt = self; | |
return object; | |
} | |
imp_t vtable_add_method(Closure *closure, Vtable *self, Object *key, imp_t method) { | |
int i; | |
for (i = 0; i < self->tally; ++i) | |
if (key == self->keys[i]) | |
return ((Closure *)self->values[i])->method = method; | |
if( self->tally == self->size ) { | |
self->size *= 2; | |
self->keys = (Object **)realloc(self->keys, sizeof(Object *) * self->size); | |
self->values = (Object **)realloc(self->values, sizeof(Object *) * self->size); | |
} | |
self->keys [self->tally ] = key; | |
self->values[self->tally++] = closure_new(method, 0); | |
return method; | |
} | |
Object *vtable_lookup(Closure *closure, Vtable *self, Object *key) { | |
int i; | |
for (i = 0; i < self->tally; ++i) | |
if(key == self->keys[i]) return self->values[i]; | |
if (self->parent) return send(self->parent, s_lookup, key); | |
fprintf(stderr, "lookup failed %p %s\n", self, ((Symbol *)key)->string); | |
return 0; | |
} | |
Object *symbol_intern(Closure *closure, Object *self, char *string) { | |
Object *symbol; | |
int i; | |
for(i = 0; i < SymbolList->tally; ++i) { | |
symbol = SymbolList->keys[i]; | |
if( !strcmp(string, ((Symbol *)symbol)->string) ) return symbol; | |
} | |
symbol = symbol_new(string); | |
vtable_add_method(0, SymbolList, symbol, 0); | |
return symbol; | |
} | |
void init(void) { | |
vtable_vt = vtable_delegated(0, 0); | |
vtable_vt->vt = vtable_vt; | |
object_vt = vtable_delegated(0, 0); | |
object_vt->vt = vtable_vt; | |
vtable_vt->parent = object_vt; | |
symbol_vt = vtable_delegated(0, object_vt); | |
closure_vt = vtable_delegated(0, object_vt); | |
SymbolList = vtable_delegated(0, 0); | |
s_lookup = symbol_intern(0, 0, "lookup"); | |
s_addMethod = symbol_intern(0, 0, "addMethod"); | |
s_allocate = symbol_intern(0, 0, "allocate"); | |
s_delegated = symbol_intern(0, 0, "delegated"); | |
vtable_add_method(0, vtable_vt, s_lookup, (imp_t)vtable_lookup); | |
vtable_add_method(0, vtable_vt, s_addMethod, (imp_t)vtable_add_method); | |
send(vtable_vt, s_addMethod, s_allocate, vtable_allocate); | |
send(vtable_vt, s_addMethod, s_delegated, vtable_delegated); | |
} | |
/*----------------------------------------------------------------*/ | |
Object *s_new= 0; | |
Object *s_length= 0; | |
struct Number { | |
Vtable *vt; | |
}; | |
Vtable *Number_vt = 0; | |
Object *Number = 0; | |
Object *Number_new(Closure *closure, struct Number *self) { | |
fprintf(stderr, "Number_new\n"); | |
exit(1); | |
return 0; | |
} | |
int Number_length(Closure *closure, struct Number *self) { | |
fprintf(stderr, "Number has no length\n"); | |
exit(1); | |
return 0; | |
} | |
struct String { | |
Vtable *vt; | |
int length; | |
char *chars; | |
}; | |
Vtable *String_vt = 0; | |
Object *String = 0; | |
Object *String_new(Closure *closure, struct String *self, int size) { | |
struct String *clone = (struct String *)send(self->vt, s_allocate, sizeof(struct String)); | |
clone->length = size; | |
clone->chars = (char *)malloc(size); | |
return (Object *)clone; | |
} | |
int String_length(Closure *closure, struct String *self) { | |
return self->length; | |
} | |
struct Vector { | |
Vtable *vt; | |
int length; | |
Object *contents; | |
}; | |
Vtable *Vector_vt = 0; | |
Object *Vector = 0; | |
Object *Vector_new(Closure *closure, struct Vector *self, int size) { | |
struct Vector *clone = (struct Vector *)send(self->vt, s_allocate, sizeof(struct Vector)); | |
clone->length = size; | |
clone->contents = (Object *)calloc(size, sizeof(Object *)); | |
return (Object *)clone; | |
} | |
int Vector_length(Closure *closure, struct Vector *self) { | |
return self->length; | |
} | |
void init2(void) { | |
s_new = symbol_intern(0, 0, "new"); | |
s_length = symbol_intern(0, 0, "length"); | |
Number_vt = (Vtable *)send(object_vt, s_delegated); | |
String_vt = (Vtable *)send(object_vt, s_delegated); | |
Vector_vt = (Vtable *)send(object_vt, s_delegated); | |
send(Number_vt, s_addMethod, s_new, Number_new); | |
send(String_vt, s_addMethod, s_new, String_new); | |
send(Vector_vt, s_addMethod, s_new, Vector_new); | |
send(Number_vt, s_addMethod, s_length, Number_length); | |
send(String_vt, s_addMethod, s_length, String_length); | |
send(Vector_vt, s_addMethod, s_length, Vector_length); | |
Number = send(Number_vt, s_allocate, 0); | |
String = send(String_vt, s_allocate, 0); | |
Vector = send(Vector_vt, s_allocate, 0); | |
} | |
/*----------------------------------------------------------------*/ | |
void doit(void) { | |
int i, j; | |
Object *a = send(String, s_new, 1); | |
Object *b = send(Vector, s_new, 1); | |
for (i = 0, j = 0; i < 1000000; ++i) { | |
j += (int)send(a, s_length) + (int)send(b, s_length); | |
j += (int)send(a, s_length) + (int)send(b, s_length); | |
j += (int)send(a, s_length) + (int)send(b, s_length); | |
j += (int)send(a, s_length) + (int)send(b, s_length); | |
j += (int)send(a, s_length) + (int)send(b, s_length); | |
} | |
printf("total %d\n", j); | |
} | |
int main() { | |
init(); | |
init2(); | |
doit(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* tp_obj tp_track(TP,tp_obj v) { return v; } | |
void tp_grey(TP,tp_obj v) { } | |
void tp_full(TP) { } | |
void tp_gc_init(TP) { } | |
void tp_gc_deinit(TP) { } | |
void tp_delete(TP,tp_obj v) { }*/ | |
/* | |
#ifdef TP_NORMALMAKE | |
#include "tp.h" | |
#include "list_auto.h" | |
#include "dict_auto.h" | |
#endif | |
void tp_grey(TP,tp_obj v) { | |
if (v.type < TP_STRING || (!v.gci.data) || *v.gci.data) { return; } | |
*v.gci.data = 1; | |
if (v.type == TP_STRING || v.type == TP_DATA) { | |
_tp_list_appendx(tp,tp->black,v); | |
return; | |
} | |
_tp_list_appendx(tp,tp->grey,v); | |
} | |
void tp_follow(TP,tp_obj v) { | |
int type = v.type; | |
if (type == TP_LIST) { | |
int n; | |
for (n=0; n<v.list.val->len; n++) { | |
tp_grey(tp,v.list.val->items[n]); | |
} | |
} | |
if (type == TP_DICT) { | |
int i; | |
for (i=0; i<v.dict.val->len; i++) { | |
int n = _tp_dict_next(tp,v.dict.val); | |
tp_grey(tp,v.dict.val->items[n].key); | |
tp_grey(tp,v.dict.val->items[n].val); | |
} | |
tp_grey(tp,v.dict.val->meta); | |
} | |
if (type == TP_FNC) { | |
tp_grey(tp,v.fnc.info->self); | |
tp_grey(tp,v.fnc.info->globals); | |
tp_grey(tp,v.fnc.info->code); | |
} | |
} | |
void tp_reset(TP) { | |
int n; | |
_tp_list *tmp; | |
for (n=0; n<tp->black->len; n++) { | |
*tp->black->items[n].gci.data = 0; | |
} | |
tmp = tp->white; | |
tp->white = tp->black; | |
tp->black = tmp; | |
} | |
void tp_gc_init(TP) { | |
tp->white = _tp_list_new(tp); | |
tp->grey = _tp_list_new(tp); | |
tp->black = _tp_list_new(tp); | |
tp->steps = 0; | |
} | |
void tp_gc_deinit(TP) { | |
_tp_list_free(tp, tp->white); | |
_tp_list_free(tp, tp->grey); | |
_tp_list_free(tp, tp->black); | |
} | |
void tp_delete(TP,tp_obj v) { | |
int type = v.type; | |
if (type == TP_LIST) { | |
_tp_list_free(tp, v.list.val); | |
return; | |
} else if (type == TP_DICT) { | |
_tp_dict_free(tp, v.dict.val); | |
return; | |
} else if (type == TP_STRING) { | |
tp_free(tp, v.string.info); | |
return; | |
} else if (type == TP_DATA) { | |
if (v.data.info->free) { | |
v.data.info->free(tp,v); | |
} | |
tp_free(tp, v.data.info); | |
return; | |
} else if (type == TP_FNC) { | |
tp_free(tp, v.fnc.info); | |
return; | |
} | |
else if( type == TP_FRAME ) { | |
tp_frame_ *f = v.frame.val; | |
tp_free(tp, f); | |
return; | |
} | |
tp_raise(,tp_string("(tp_delete) TypeError: ?")); | |
} | |
void tp_collect(TP) { | |
int n; | |
for (n=0; n<tp->white->len; n++) { | |
tp_obj r = tp->white->items[n]; | |
if (*r.gci.data) { continue; } | |
tp_delete(tp,r); | |
} | |
tp->white->len = 0; | |
tp_reset(tp); | |
} | |
void _tp_gcinc(TP) { | |
tp_obj v; | |
if (!tp->grey->len) { | |
return; | |
} | |
v = _tp_list_pop(tp,tp->grey,tp->grey->len-1,"_tp_gcinc"); | |
tp_follow(tp,v); | |
_tp_list_appendx(tp,tp->black,v); | |
} | |
void tp_full(TP) { | |
while (tp->grey->len) { | |
_tp_gcinc(tp); | |
} | |
tp_collect(tp); | |
tp_follow(tp,tp->root); | |
} | |
void tp_gcinc(TP) { | |
tp->steps += 1; | |
if (tp->steps < TP_GCMAX || tp->grey->len > 0) { | |
_tp_gcinc(tp); _tp_gcinc(tp); | |
} | |
if (tp->steps < TP_GCMAX || tp->grey->len > 0) { return; } | |
tp->steps = 0; | |
tp_full(tp); | |
return; | |
} | |
tp_obj tp_track(TP,tp_obj v) { | |
tp_gcinc(tp); | |
tp_grey(tp,v); | |
return v; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) 2006, 2007 by Sergey Lyubka | |
# All rights reserved | |
# | |
# $Id$ | |
SRCS= q.c | |
PROG= q | |
CFLAGS= -W -Wall -g -pedantic -ansi -D_BSD_SOURCE $(COPT) | |
all: $(PROG) | |
$(PROG): $(HDRS) $(SRCS) | |
$(CC) $(CFLAGS) $(SRCS) -o $@ | |
clean: | |
rm -f *.o $(PROG) *core |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#define STACK_MAX 256 | |
typedef enum { | |
OBJ_INT, | |
OBJ_PAIR | |
} ObjectType; | |
typedef struct sObject { | |
ObjectType type; | |
unsigned char marked; | |
/* The next object in the linked list of heap allocated objects. */ | |
struct sObject* next; | |
union { | |
/* OBJ_INT */ | |
int value; | |
/* OBJ_PAIR */ | |
struct { | |
struct sObject* head; | |
struct sObject* tail; | |
}; | |
}; | |
} Object; | |
typedef struct { | |
Object* stack[STACK_MAX]; | |
int stackSize; | |
/* The first object in the linked list of all objects on the heap. */ | |
Object* firstObject; | |
/* The total number of currently allocated objects. */ | |
int numObjects; | |
/* The number of objects required to trigger a GC. */ | |
int maxObjects; | |
} VM; | |
void assert(int condition, const char* message) { | |
if (!condition) { | |
printf("%s\n", message); | |
exit(1); | |
} | |
} | |
VM* newVM() { | |
VM* vm = malloc(sizeof(VM)); | |
vm->stackSize = 0; | |
vm->firstObject = NULL; | |
vm->numObjects = 0; | |
vm->maxObjects = 8; | |
return vm; | |
} | |
void push(VM* vm, Object* value) { | |
assert(vm->stackSize < STACK_MAX, "Stack overflow!"); | |
vm->stack[vm->stackSize++] = value; | |
} | |
Object* pop(VM* vm) { | |
assert(vm->stackSize > 0, "Stack underflow!"); | |
return vm->stack[--vm->stackSize]; | |
} | |
void mark(Object* object) { | |
/* If already marked, we're done. Check this first to avoid recursing | |
on cycles in the object graph. */ | |
if (object->marked) return; | |
object->marked = 1; | |
if (object->type == OBJ_PAIR) { | |
mark(object->head); | |
mark(object->tail); | |
} | |
} | |
void markAll(VM* vm) | |
{ | |
for (int i = 0; i < vm->stackSize; i++) { | |
mark(vm->stack[i]); | |
} | |
} | |
void sweep(VM* vm) | |
{ | |
Object** object = &vm->firstObject; | |
while (*object) { | |
if (!(*object)->marked) { | |
/* This object wasn't reached, so remove it from the list and free it. */ | |
Object* unreached = *object; | |
*object = unreached->next; | |
free(unreached); | |
vm->numObjects--; | |
} else { | |
/* This object was reached, so unmark it (for the next GC) and move on to | |
the next. */ | |
(*object)->marked = 0; | |
object = &(*object)->next; | |
} | |
} | |
} | |
void gc(VM* vm) { | |
int numObjects = vm->numObjects; | |
markAll(vm); | |
sweep(vm); | |
vm->maxObjects = vm->numObjects * 2; | |
printf("Collected %d objects, %d remaining.\n", numObjects - vm->numObjects, | |
vm->numObjects); | |
} | |
Object* newObject(VM* vm, ObjectType type) { | |
if (vm->numObjects == vm->maxObjects) gc(vm); | |
Object* object = malloc(sizeof(Object)); | |
object->type = type; | |
object->next = vm->firstObject; | |
vm->firstObject = object; | |
object->marked = 0; | |
vm->numObjects++; | |
return object; | |
} | |
void pushInt(VM* vm, int intValue) { | |
Object* object = newObject(vm, OBJ_INT); | |
object->value = intValue; | |
push(vm, object); | |
} | |
Object* pushPair(VM* vm) { | |
Object* object = newObject(vm, OBJ_PAIR); | |
object->tail = pop(vm); | |
object->head = pop(vm); | |
push(vm, object); | |
return object; | |
} | |
void objectPrint(Object* object) { | |
switch (object->type) { | |
case OBJ_INT: | |
printf("%d", object->value); | |
break; | |
case OBJ_PAIR: | |
printf("("); | |
objectPrint(object->head); | |
printf(", "); | |
objectPrint(object->tail); | |
printf(")"); | |
break; | |
} | |
} | |
void freeVM(VM *vm) { | |
vm->stackSize = 0; | |
gc(vm); | |
free(vm); | |
} | |
void test1() { | |
printf("Test 1: Objects on stack are preserved.\n"); | |
VM* vm = newVM(); | |
pushInt(vm, 1); | |
pushInt(vm, 2); | |
gc(vm); | |
assert(vm->numObjects == 2, "Should have preserved objects."); | |
freeVM(vm); | |
} | |
void test2() { | |
printf("Test 2: Unreached objects are collected.\n"); | |
VM* vm = newVM(); | |
pushInt(vm, 1); | |
pushInt(vm, 2); | |
pop(vm); | |
pop(vm); | |
gc(vm); | |
assert(vm->numObjects == 0, "Should have collected objects."); | |
freeVM(vm); | |
} | |
void test3() { | |
printf("Test 3: Reach nested objects.\n"); | |
VM* vm = newVM(); | |
pushInt(vm, 1); | |
pushInt(vm, 2); | |
pushPair(vm); | |
pushInt(vm, 3); | |
pushInt(vm, 4); | |
pushPair(vm); | |
pushPair(vm); | |
gc(vm); | |
assert(vm->numObjects == 7, "Should have reached objects."); | |
freeVM(vm); | |
} | |
void test4() { | |
printf("Test 4: Handle cycles.\n"); | |
VM* vm = newVM(); | |
pushInt(vm, 1); | |
pushInt(vm, 2); | |
Object* a = pushPair(vm); | |
pushInt(vm, 3); | |
pushInt(vm, 4); | |
Object* b = pushPair(vm); | |
a->tail = b; | |
b->tail = a; | |
gc(vm); | |
assert(vm->numObjects == 4, "Should have collected objects."); | |
freeVM(vm); | |
} | |
void perfTest() { | |
printf("Performance Test.\n"); | |
VM* vm = newVM(); | |
for (int i = 0; i < 1000; i++) { | |
for (int j = 0; j < 20; j++) { | |
pushInt(vm, i); | |
} | |
for (int k = 0; k < 20; k++) { | |
pop(vm); | |
} | |
} | |
freeVM(vm); | |
} | |
int main(int argc, const char * argv[]) { | |
test1(); | |
test2(); | |
test3(); | |
test4(); | |
perfTest(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2004-2007 Sergey Lyubka <[email protected]> | |
* All rights reserved | |
* | |
* "THE BEER-WARE LICENSE" (Revision 42): | |
* Sergey Lyubka wrote this file. As long as you retain this notice you | |
* can do whatever you want with this stuff. If we meet some day, and you think | |
* this stuff is worth it, you can buy me a beer in return. | |
* | |
* $Id$ | |
*/ | |
#include <sys/stat.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#include <stdarg.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <setjmp.h> | |
#include <stddef.h> | |
#include <errno.h> | |
#define MAX_SOURCE_SIZE 32768 /* Maximum source file size */ | |
#define CHUNK_SIZE 64 /* Runtime stack growing size */ | |
#define ERR_STR_SIZE 256 /* Error buffer size */ | |
#define HASH_SIZE 13 /* Hash object hash table size */ | |
#define PCODE_MAGIC "ABC1" /* Compiled p-code signature */ | |
#define MAX_EXPR_TOKS 512 /* Maximum tokens in expression */ | |
#define ARRAY_SIZE(array) ((int) (sizeof(array) / sizeof(array[0]))) | |
struct q; | |
struct obj; | |
/* | |
* Circular doubly-linked list macros: http://silversoft.net/~devnull/llist.h | |
*/ | |
struct llhead { | |
struct llhead *next; | |
struct llhead *prev; | |
}; | |
#define LL_INIT(N) ((N)->next = (N)->prev = (N)) | |
#define LL_HEAD(H) struct llhead H = {&H, &H} | |
#define LL_NULL {NULL, NULL} | |
#define LL_ENTRY(P,T,N) ((T *)((char *)(P) - offsetof(T, N))) | |
#define LL_ADD(H, N) \ | |
do { \ | |
((H)->next)->prev = (N); \ | |
(N)->next = ((H)->next); \ | |
(N)->prev = (H); \ | |
(H)->next = (N); \ | |
} while (0) | |
#define LL_TAIL(H, N) \ | |
do { \ | |
((H)->prev)->next = (N); \ | |
(N)->prev = ((H)->prev); \ | |
(N)->next = (H); \ | |
(H)->prev = (N); \ | |
} while (0) | |
#define LL_DEL(N) \ | |
do { \ | |
((N)->next)->prev = ((N)->prev); \ | |
((N)->prev)->next = ((N)->next); \ | |
LL_INIT(N); \ | |
} while (0) | |
#define LL_EMPTY(N) ((N)->next == (N)) | |
#define LL_FOREACH(H,N) for (N = (H)->next; N != (H); N = (N)->next) | |
#define LL_FOREACH_SAFE(H,N,T) \ | |
for (N = (H)->next, T = (N)->next; N != (H); \ | |
N = (T), T = (N)->next) | |
struct hash { | |
struct obj *parent; /* Parent object */ | |
struct llhead glob_list; /* For all entries in the hash */ | |
struct llhead tab[HASH_SIZE]; /* Hash table itself */ | |
}; | |
/* | |
* Typedefs to abstract environment-specific types | |
*/ | |
typedef void (*q_op_handler_t)(struct q *); | |
typedef double numeric_t; /* Type for numeric values */ | |
typedef size_t str_size_t; /* For string lengths */ | |
typedef int code_ofs_t; /* For relative JUMP operands */ | |
/* | |
* Strings are arbitrary chunks of uninterpreted binary data. | |
* Represented by a pointer to data and data length in bytes. | |
*/ | |
struct vec { | |
int len; /* Length of binary data */ | |
char *ptr; /* Pointer to binary data */ | |
}; | |
/* | |
* Value of any supported type | |
*/ | |
union val { | |
struct vec str; /* String value */ | |
q_op_handler_t cfunc; /* Function implemented in C */ | |
struct hash *hash; /* Hash value */ | |
numeric_t num; /* Numeric value */ | |
code_ofs_t ofs; /* Jump offset value */ | |
}; | |
/* | |
* Object. This is a type marker and associated value. | |
*/ | |
struct obj { | |
unsigned char type; /* TYPE_XXX, see below */ | |
union val val; /* Value holder */ | |
}; | |
/* | |
* Hash entry | |
*/ | |
struct he { | |
struct llhead link; /* Hash cell linkage */ | |
struct obj key; /* Entry' key */ | |
struct obj val; /* Entry's value */ | |
}; | |
/* | |
* Q context | |
*/ | |
struct q { | |
struct llhead link; /* Linkage */ | |
char *name; /* Module name */ | |
char *code; /* Code */ | |
struct obj *stack; /* Stack */ | |
int sc; /* Stack counter */ | |
int fc; /* Call frame pointer */ | |
int cc; /* Code counter */ | |
int cs; /* Code size */ | |
int ss; /* Stack size */ | |
jmp_buf jmpbuf; /* Exception environ */ | |
char *err_str; /* Error message placeholder */ | |
}; | |
/* | |
* This is a parse token and syntax tree node at the same time | |
*/ | |
struct node { | |
struct llhead link; /* Linkage in expression */ | |
struct vec vec; /* Points to the source code */ | |
int tok; /* Token's value */ | |
int line_no; /* Line number in the source */ | |
struct node *left; | |
struct node *right; | |
struct node *parent; | |
}; | |
/* | |
* Types and their names | |
*/ | |
enum {TYPE_NIL, TYPE_NUM, TYPE_STR, TYPE_HASH, TYPE_FUNC, TYPE_CFUNC}; | |
static const char *types[] = {"nil", "num", "str", "hash", "func", "c-func"}; | |
/* | |
* Opcodes and their names. Names are prepended by a letter that specifies | |
* opcode parameters (used by disassembly function) | |
*/ | |
enum { | |
OP_END, OP_CALL, OP_RETURN, OP_SET, OP_GET, OP_PUSH, OP_MATH, | |
OP_POP, OP_JUMP, OP_DUP, OP_PUSHNS, OP_NEWMAP, NUM_INSTTRUCTIONS | |
}; | |
static const char *opcodes[] = { | |
"xEND", "xCALL", "xRETURN", "xSET", "xGET", "tPUSH", "aMATH", | |
"xPOP", "xJUMP", "nDUP", "xPUSHNS", "xNEWMAP" | |
}; | |
static const char *serr = "syntax error"; | |
static const char *rerr = "runtime error"; | |
/* | |
* Tokens. XXX Make sure that long token values do not clash with one char. | |
* We start numbering long tokens from 'a', which gives us at least | |
* 26 English letters; there are no letters in one char tokens. | |
*/ | |
static const char *one_char_tokens = "=[](){}+-*/^,;.\""; | |
enum { | |
TOK_IDENT = 'a', TOK_NUM, TOK_STR, TOK_FUNC, TOK_IF, TOK_ELSE, | |
TOK_ADDE, TOK_SUBE, TOK_MULE, TOK_DIVE, TOK_NIL, TOK_NOT, | |
TOK_AND, TOK_OR, | |
/* XXX up to this point, must be parallel to long_tokens[] */ | |
TOK_END | |
}; | |
static const char *long_tokens[] = { | |
"%v", "%d", "%s", "function", "if", "else", "+=", "-=", "*=", "/=", | |
"nil", "not", "and", "or", NULL | |
}; | |
static LL_HEAD(loaded_modules); | |
static void die(struct q *q, const char *fmt, ...) | |
{ | |
va_list ap; | |
va_start(ap, fmt); | |
(void) vsnprintf(q->err_str, ERR_STR_SIZE, fmt, ap); | |
va_end(ap); | |
longjmp(q->jmpbuf, 1); | |
} | |
static void expand_code(struct q *q, int len) | |
{ | |
q->cs += len; | |
q->code = realloc(q->code, q->cs); | |
if (q->code == NULL) | |
die(q, "%s: cannot grow code to %u bytes", serr, q->cs); | |
} | |
static void emit(struct q *q, const void *mem, int len) | |
{ | |
if (q->code == NULL || q->cc + len >= q->cs) | |
expand_code(q, CHUNK_SIZE); | |
(void) memcpy(q->code + q->cc, mem, len); | |
q->cc += len; | |
} | |
static void emit_byte(struct q *q, unsigned char value) | |
{ | |
emit(q, &value, sizeof(value)); | |
} | |
static void emit_num(struct q *q, numeric_t val) | |
{ | |
emit_byte(q, OP_PUSH); | |
emit_byte(q, TYPE_NUM); | |
emit(q, &val, sizeof(val)); | |
} | |
static const int hex_tab[] = { | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, | |
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 | |
}; | |
static int hex_ascii_to_int(struct q *q, unsigned char ch, int line_no) | |
{ | |
if (ch >= ARRAY_SIZE(hex_tab) || hex_tab[ch] == -1) | |
die(q, "%s: line %d: badly encoded string", serr, line_no); | |
return (hex_tab[ch]); | |
} | |
static void emit_string_constant(struct q *q, const struct node *node) | |
{ | |
char *dst, *src; | |
str_size_t n = 0; | |
/* XXX Must go first, cause expand_code may change q->code pointer */ | |
if (q->code + q->cc + 2 + sizeof(n) + node->vec.len > q->code + q->cs) | |
expand_code(q, node->vec.len + 2 + sizeof(n) + 1); | |
dst = q->code + q->cc + 2 + sizeof(n); | |
src = node->vec.ptr + 1; | |
assert(node->vec.ptr[0] == '"'); | |
assert(node->vec.ptr[node->vec.len - 1] == '"'); | |
assert(dst + node->vec.len < q->code + q->cs); | |
for (; src < node->vec.ptr + node->vec.len - 1; src++, n++) | |
if (*src == '%') { | |
dst[n] = hex_ascii_to_int(q,src[1],node->line_no) << 4; | |
dst[n] |= hex_ascii_to_int(q, src[2], node->line_no); | |
src += 2; | |
} else { | |
dst[n] = *src; | |
} | |
emit_byte(q, OP_PUSH); | |
emit_byte(q, TYPE_STR); | |
emit(q, &n, sizeof(n)); | |
q->cc += n; | |
} | |
/* | |
* Operator attributes table. Operator's token is an index in this table. | |
* The value stored is an integer, where: | |
* o least significant byte is operator's priority | |
* o next byte is a number of operands | |
* o next byte is a flag, whether operator is right-to-left, like '=' | |
*/ | |
static int op_tab[256]; | |
static int set_op_attributes(int priority, int num_args, int is_right_to_left) | |
{ | |
return (priority | (num_args << 8) | (is_right_to_left << 16)); | |
} | |
static void init_op_tab(void) | |
{ | |
op_tab['.'] = op_tab['('] = set_op_attributes(100, 2, 0); | |
op_tab['['] = set_op_attributes(100, 1, 1); | |
op_tab['*'] = op_tab['/'] = op_tab['%'] = set_op_attributes(80, 2, 0); | |
op_tab['+'] = op_tab['-'] = set_op_attributes(60, 2, 0); | |
op_tab['='] = op_tab[TOK_ADDE] = op_tab[TOK_SUBE] = | |
op_tab[TOK_MULE] = op_tab[TOK_DIVE] = set_op_attributes(20, 2, 1); | |
op_tab[','] = set_op_attributes(10, 2, 0); | |
} | |
/* | |
* Shortcut functions to access operator attributes | |
*/ | |
static int prio(int tok) { return (op_tab[tok] & 0xff); } | |
static int nargs(int tok) { return ((op_tab[tok] >> 8) & 0xff); } | |
static int is_rtl(int tok) { return ((op_tab[tok] >> 16) & 0xff); } | |
static int is_op(int tok) { return (op_tab[tok]); } | |
static int calc_number_of_params(const struct node *node, int n) | |
{ | |
if (node->left) | |
n += calc_number_of_params(node->left, 0); | |
if (node->right) | |
n += calc_number_of_params(node->right, 0); | |
if (node->tok == ',') | |
n++; | |
return (n); | |
} | |
static int is_lvalue(const struct node *node) | |
{ | |
return (node->parent != NULL && node->parent->tok == '=' && | |
node->parent->left == node); | |
} | |
static int is_leftmost(const struct node *node) | |
{ | |
if (node->parent != NULL && node->parent->left == node && | |
node->parent->tok == '=') { | |
return (1); | |
} else if (node->parent != NULL && node->parent->left == node && | |
node->parent->tok == '.') { | |
return (is_leftmost(node->parent)); | |
} else { | |
return (0); | |
} | |
} | |
static void emit_ident(struct q *q, const struct node *node) | |
{ | |
emit_byte(q, OP_PUSH); | |
emit_byte(q, TYPE_STR); | |
emit(q, &node->vec.len, sizeof(node->vec.len)); | |
emit(q, node->vec.ptr, node->vec.len); | |
} | |
static void emit_expr(struct q *, const struct node *); | |
static void emit_hash_definition(struct q *q, const struct node *node, int idx) | |
{ | |
if (node == NULL) { | |
die(q, "%s: line %d: bad hash def", serr, node->line_no); | |
} else if (node->tok == ',') { | |
emit_hash_definition(q, node->left, idx); | |
emit_byte(q, OP_POP); | |
emit_hash_definition(q, node->right, idx + 1); | |
} else if (node->tok == '=') { | |
if (node->left->tok == TOK_IDENT) { | |
emit_expr(q, node->right); | |
emit_byte(q, OP_DUP); | |
emit_byte(q, 1); | |
emit_ident(q, node->left); | |
emit_byte(q, OP_SET); | |
} else { | |
die(q, "%s: line %d: bad hash def", serr,node->line_no); | |
} | |
} else { | |
/* Key is the index of the entry in the definition */ | |
emit_expr(q, node); | |
emit_byte(q, OP_DUP); | |
emit_byte(q, 1); | |
emit_num(q, idx); | |
emit_byte(q, OP_SET); | |
} | |
} | |
static int emit_params(struct q *q, const struct node *node, int n) | |
{ | |
if (node->tok == TOK_NIL) { | |
/* Do nothing - no parameters passed to function */ | |
} else if (node->tok == ',') { | |
n = emit_params(q, node->left, n); | |
n = emit_params(q, node->right, n); | |
} else { | |
emit_expr(q, node); | |
n++; | |
} | |
return (n); | |
} | |
static void emit_function_call(struct q *q, const struct node *node) | |
{ | |
int num_params; | |
num_params = emit_params(q, node->right, 0); | |
emit_num(q, num_params); | |
emit_expr(q, node->left); | |
emit_byte(q, OP_CALL); | |
} | |
static void emit_expr(struct q *q, const struct node *node) | |
{ | |
switch (node->tok) { | |
case '.': | |
if (node->left->tok == TOK_IDENT) | |
emit_byte(q, OP_PUSHNS); | |
emit_expr(q, node->left); | |
emit_expr(q, node->right); | |
emit_byte(q, OP_GET); | |
break; | |
case '%': | |
case '*': | |
case '/': | |
case '+': | |
case '-': | |
emit_expr(q, node->left); | |
emit_expr(q, node->right); | |
emit_byte(q, OP_MATH); | |
emit_byte(q, node->tok); | |
break; | |
case '=': | |
emit_expr(q, node->right); | |
emit_expr(q, node->left); | |
if (node->left->tok != TOK_IDENT && | |
node->left->tok != '.' && node->left->tok != '=') | |
die(q, "%s: %d: %s", serr, | |
node->line_no, "invalid assignment"); | |
emit_byte(q, OP_SET); | |
break; | |
case ',': | |
emit_expr(q, node->left); | |
emit_expr(q, node->right); | |
die(q, "%s: %d: %s", serr, node->line_no, "use ';', not ','"); | |
break; | |
case '(': | |
emit_function_call(q, node); | |
break; | |
case TOK_NIL: | |
emit_byte(q, OP_PUSH); | |
emit_byte(q, TYPE_NIL); | |
break; | |
case TOK_IDENT: | |
/* | |
* Identifier could be either rvalue or lvalue: | |
* "foo.bar = baz = x.y;" | |
* Lvalues: bar, baz | |
* Rvalues: foo, x, y | |
* Lvalue is a key, for namespace insert. | |
* Rvalue is always a key for namespace lookup. | |
*/ | |
if (node->parent == NULL || node->parent->tok != '.') | |
emit_byte(q, OP_PUSHNS); | |
emit_ident(q, node); | |
if (!is_lvalue(node) && !(node->parent != NULL && | |
node->parent->tok == '.' && is_lvalue(node->parent))) | |
emit_byte(q, OP_GET); | |
break; | |
case TOK_STR: | |
emit_string_constant(q, node); | |
break; | |
case TOK_NUM: | |
emit_num(q, strtod(node->vec.ptr, NULL)); | |
break; | |
case '[': | |
emit_byte(q, OP_NEWMAP); | |
assert(node->right != NULL); | |
if (node->right->tok != TOK_NIL) { | |
emit_hash_definition(q, node->right, 0); | |
emit_byte(q, OP_POP); | |
} | |
break; | |
default: | |
die(q, "%s: %d: %s [%d]", serr, node->line_no, | |
"internal parser error", node->tok); | |
break; | |
} | |
} | |
static int cmp_vec(const struct vec *a, const struct vec *b) | |
{ | |
int rc; | |
if (a->len < b->len) | |
return (-cmp_vec(b, a)); | |
rc = memcmp(a->ptr, b->ptr, b->len); | |
if (rc == 0) | |
rc = a->len - b->len; | |
return (rc); | |
} | |
static int cmp(const struct obj *a, const struct obj *b) | |
{ | |
int res = 0; | |
assert(a->type == b->type); | |
switch (a->type) { | |
case TYPE_NIL: | |
break; | |
case TYPE_NUM: | |
res = a->val.num - b->val.num; | |
break; | |
case TYPE_STR: | |
res = cmp_vec(&a->val.str, &b->val.str); | |
break; | |
case TYPE_CFUNC: | |
break; | |
case TYPE_FUNC: | |
res = a->val.cfunc == b->val.cfunc ? 0 : 1; | |
break; | |
case TYPE_HASH: | |
res = a->val.hash == b->val.hash ? 0 : 1; | |
break; | |
default: | |
abort(); | |
break; | |
} | |
return (res); | |
} | |
static unsigned long hash_str(unsigned long hash, const struct vec *vec) | |
{ | |
int i; | |
for (i = 0; i < vec->len; i++) | |
hash = (hash << 5) - hash + vec->ptr[i]; | |
return (hash); | |
} | |
static unsigned long calc_hash(const struct obj *obj) | |
{ | |
unsigned long hash = obj->type; | |
switch (obj->type) { | |
case TYPE_NIL: | |
break; | |
case TYPE_NUM: | |
hash += (unsigned long) obj->val.num; | |
break; | |
case TYPE_STR: | |
hash = hash_str(hash, &obj->val.str); | |
break; | |
case TYPE_CFUNC: | |
hash += (unsigned long) obj->val.ofs; | |
break; | |
case TYPE_FUNC: | |
hash += (unsigned long) obj->val.cfunc; | |
break; | |
case TYPE_HASH: | |
break; | |
default: | |
abort(); | |
break; | |
} | |
return (hash); | |
} | |
static struct he *lookup(struct obj *obj, struct obj *key, size_t *pcell) | |
{ | |
struct llhead *lp, *head; | |
struct hash *hash; | |
struct he *he; | |
size_t cell; | |
assert(obj->type == TYPE_HASH); | |
hash = obj->val.hash; | |
cell = calc_hash(key) % ARRAY_SIZE(hash->tab); | |
head = hash->tab + cell; | |
if (pcell != NULL) | |
*pcell = cell; | |
LL_FOREACH(head, lp) { | |
he = LL_ENTRY(lp, struct he, link); | |
if (he->key.type == key->type && cmp(&he->key, key) == 0) | |
return(he); | |
} | |
return (NULL); | |
} | |
static struct obj *lookup_ins(struct obj *obj, struct obj *key, struct obj *val) | |
{ | |
struct he *he; | |
struct hash *hash; | |
size_t cell; | |
hash = obj->val.hash; | |
if ((he = lookup(obj, key, &cell)) == NULL) { | |
he = calloc(1, sizeof(*he)); | |
he->key = *key; | |
if (val != NULL) | |
he->val = *val; | |
LL_TAIL(hash->tab + cell, &he->link); | |
} | |
return (&he->val); | |
} | |
static struct obj *lookup_rec(struct q *q, struct obj *obj, struct obj *key) | |
{ | |
struct he *he; | |
while (obj != NULL) { | |
if (obj->type != TYPE_HASH) | |
die(q, "%s: %d: attempt to get an attribute from the" | |
" <%s> object", rerr, q->cc, types[obj->type]); | |
else if ((he = lookup(obj, key, NULL)) != NULL) | |
return (&he->val); | |
/* This lookup is recursive. Switch to the parent object */ | |
obj = obj->val.hash->parent; | |
} | |
return (NULL); | |
} | |
static struct obj *pop(struct q *q) | |
{ | |
if (--q->sc < 0) | |
die(q, "%s: %d: %s", rerr, q->cc, "stack underflow"); | |
return (q->stack + q->sc); | |
} | |
static struct obj *push(struct q *q) | |
{ | |
if (q->stack == NULL || q->sc >= (int) q->ss) { | |
q->ss += CHUNK_SIZE; | |
q->stack = realloc(q->stack, q->ss * sizeof(q->stack[0])); | |
} | |
if (q->stack == NULL) | |
die(q, "%s: %d: %s %u", rerr, q->cc, | |
"cannot grow stack to %u", q->ss); | |
return (q->stack + q->sc++); | |
} | |
static void init_hash_obj(struct obj *obj, struct obj *parent) | |
{ | |
struct hash *hash; | |
size_t i; | |
obj->type = TYPE_HASH; | |
obj->val.hash = hash = malloc(sizeof(*obj->val.hash)); | |
hash->parent = parent; | |
LL_INIT(&hash->glob_list); | |
for (i = 0; i < ARRAY_SIZE(hash->tab); i++) | |
LL_INIT(hash->tab + i); | |
} | |
/* | |
* Get integer value from stack, at relative offset stack_ofs | |
*/ | |
static int gsi(struct q *q, int stack_ofs) | |
{ | |
const struct obj *obj = q->stack + q->sc - 1 - stack_ofs; | |
if (obj < q->stack) | |
die(q, "%s: %d: stack underflow", rerr, q->cc); | |
else if (obj->type != TYPE_NUM) | |
die(q, "%s: %d: expected number on stack", rerr, q->cc); | |
return ((int) obj->val.num); | |
} | |
static void execute(struct q *q) | |
{ | |
const char *c = q->code + q->cc; | |
struct obj *obj, *val, *key, *a, *b; | |
while (*c != OP_END) { | |
q->cc = c - q->code; | |
if (q->cc >= (int) q->cs) | |
die(q, "%s", "corrupt code"); | |
#if 0 | |
printf("%d: sc %d (%s)\n", | |
q->cc, q->sc, types[q->stack[q->sc - 1].type]); | |
#endif | |
switch (*c++) { | |
case OP_CALL: /* ( -- ret_addr old_fc new_namespace) */ | |
obj = pop(q); | |
if (obj->type == TYPE_CFUNC) { | |
obj->val.cfunc(q); | |
q->sc -= obj[-1].val.num + 1; | |
} else if (obj->type == TYPE_FUNC) { | |
/* TODO - implement native functions */ | |
} else { | |
die(q, "%s: %d: attempt to call <%s> object", | |
rerr, q->cc, types[obj->type]); | |
} | |
break; | |
case OP_RETURN: | |
break; | |
case OP_SET: /* (val obj key -- val) */ | |
key = pop(q); | |
obj = pop(q); | |
val = pop(q); | |
if (obj->type != TYPE_HASH) | |
die(q, "%s: %d: attemt to set " | |
"an attribute to <%s> object", | |
rerr, q->cc, types[obj->type]); | |
(void) lookup_ins(obj, key, val); | |
q->sc++; | |
break; | |
case OP_GET: /* (obj key -- val) */ | |
key = pop(q); | |
obj = pop(q); | |
if ((val = lookup_rec(q, obj, key)) != NULL) | |
*obj = *val; | |
else | |
obj->type = TYPE_NIL; | |
q->sc++; | |
break; | |
case OP_PUSH: /* ( -- val) */ | |
obj = push(q); | |
obj->type = *c++; | |
switch (obj->type) { | |
case TYPE_NUM: | |
obj->val.num = * (numeric_t *) c; | |
c += sizeof(numeric_t); | |
break; | |
case TYPE_STR: | |
obj->val.str.len = * (str_size_t *) c; | |
c += sizeof(str_size_t); | |
obj->val.str.ptr = (char *) c; | |
c += obj->val.str.len; | |
break; | |
case TYPE_HASH: | |
init_hash_obj(obj, NULL); | |
break; | |
case TYPE_NIL: | |
break; | |
default: | |
die(q, "%s: %d: %s", rerr, q->cc, | |
"attempt to PUSH an unknown shit"); | |
abort(); | |
break; | |
} | |
break; | |
case OP_MATH: /* (val1 val2 -- result) */ | |
b = pop(q); | |
a = pop(q); | |
if (a->type != TYPE_NUM || b->type != TYPE_NUM) | |
die(q, "%s: %d: <%s> found, <num> expected", | |
rerr, q->cc, types[a->type]); | |
switch (*c++) { | |
case '+': a->val.num += b->val.num; break; | |
case '-': a->val.num -= b->val.num; break; | |
case '*': a->val.num *= b->val.num; break; | |
case '/': | |
if (b->val.num == 0) | |
die(q, "%s: %d: %s", | |
rerr, q->cc, "division by zero"); | |
a->val.num /= b->val.num; | |
break; | |
default: | |
die(q, "%s: %d: unexpected arith operator: %u", | |
rerr, q->cc, c[-1]); | |
break; | |
} | |
q->sc++; | |
break; | |
case OP_POP: /* (obj --) */ | |
(void) pop(q); | |
break; | |
case OP_DUP: /* (-- obj) */ | |
if (q->sc <= *c) | |
die(q, "%s: stack underflow", rerr); | |
obj = q->stack + q->sc - 1 - *c++; | |
val = push(q); | |
*val = *obj; | |
break; | |
case OP_JUMP: /* (--) */ | |
break; | |
case OP_PUSHNS: | |
obj = push(q); | |
obj->type = TYPE_HASH; | |
obj->val.hash = q->stack[q->fc].val.hash; | |
break; | |
case OP_NEWMAP: /* (-- hash_obj) */ | |
obj = push(q); | |
init_hash_obj(obj, &q->stack[q->fc]); | |
break; | |
default: | |
die(q, "%s: %d: %s", rerr, q->cc, "corrupter code"); | |
break; | |
} | |
} | |
} | |
static void disasm(struct q *q) | |
{ | |
const char *name, *c; | |
int len; | |
for (c = q->code; *c != OP_END; c++) { | |
assert(NUM_INSTTRUCTIONS == ARRAY_SIZE(opcodes)); | |
assert(*c < NUM_INSTTRUCTIONS); | |
name = opcodes[(int) *c]; | |
printf("%-4d %s", c - q->code, name + 1); | |
if (name[0] == 'a') { | |
printf(" %c", *++c); | |
} else if (name[0] == 'n') { | |
printf(" %d", *++c); | |
} else if (name[0] == 't') { | |
printf(" <%s> ", types[(int) *++c]); | |
switch (*c) { | |
case TYPE_NIL: | |
break; | |
case TYPE_NUM: | |
printf("%.2f", (double) *(numeric_t *)(c + 1)); | |
c += sizeof(numeric_t); | |
break; | |
case TYPE_STR: | |
len = * (str_size_t *) (c + 1); | |
c += sizeof(str_size_t); | |
printf("\"%.*s\"", (int) len, c + 1); | |
c += len; | |
break; | |
default: | |
break; | |
} | |
} | |
(void) putchar('\n'); | |
} | |
} | |
static void print_obj(const struct obj *obj) | |
{ | |
switch (obj->type) { | |
case TYPE_STR: | |
printf("%.*s", (int) obj->val.str.len, obj->val.str.ptr); | |
break; | |
case TYPE_NUM: | |
printf("%.2f", (double) obj->val.num); | |
break; | |
default: | |
printf("<%s>", types[obj->type]); | |
break; | |
} | |
} | |
static void print(struct q *q) | |
{ | |
int i, param_count = gsi(q, 0); | |
struct obj *obj = q->stack + q->sc - param_count - 1; | |
for (i = 0; i < param_count; i++, obj++) | |
print_obj(obj); | |
} | |
static void q_free(struct q *q) | |
{ | |
if (q != NULL) { | |
if (q->code != NULL) | |
free(q->code); | |
if (q->stack != NULL) | |
free(q->stack); | |
if (q->name != NULL) | |
free(q->name); | |
free(q); | |
} | |
} | |
static struct obj *setattr(struct obj *obj, const char *name, struct obj *attr) | |
{ | |
struct obj key; | |
key.type = TYPE_STR; | |
key.val.str.ptr = (char *) name; | |
key.val.str.len = strlen(name); | |
return (lookup_ins(obj, &key, attr)); | |
} | |
static void import_builtin_objects(struct q *q) | |
{ | |
struct obj obj, *namespace = q->stack; | |
obj.type = TYPE_CFUNC; | |
obj.val.cfunc = &print; | |
setattr(namespace, "print", &obj); | |
obj.val.cfunc = &disasm; | |
setattr(namespace, "disasm", &obj); | |
} | |
static void q_save(const char *path, const struct q *q, char *err_str) | |
{ | |
FILE *fp; | |
if ((fp = fopen(path, "w+")) == NULL) { | |
(void) snprintf(err_str, ERR_STR_SIZE, | |
"fopen(%s): %s", path, strerror(errno)); | |
} else { | |
(void) fwrite(PCODE_MAGIC, 1, sizeof(PCODE_MAGIC) - 1, fp); | |
(void) fwrite(q->code, 1, q->cs, fp); | |
(void) fclose(fp); | |
} | |
} | |
static int match_long_token(const char *str, const char *in) | |
{ | |
int len = 0; | |
char *end; | |
if (str[0] == '%' && str[1] == 'v') { | |
if (*in == '_' || isalpha(* (unsigned char *) in)) | |
while (in[len] == '_' || | |
isalnum(((unsigned char *)in)[len])) | |
len++; | |
} else if (str[0] == '%' && str[1] == 'd') { | |
(void) strtod(in, &end); | |
len = end - in; | |
} else if (str[0] == '%' && str[1] == 's') { | |
if (*in == '"' && (end = strchr(in + 1, '"')) != NULL) | |
len = end - in + 1; | |
} else if (strncmp(in, str, strlen(str)) == 0) { | |
len = strlen(str); | |
} | |
return (len); | |
} | |
/* | |
* Parse the file, calling user function for every token. | |
* Return total number of tokens, including last TOK_END. | |
*/ | |
static int tokenize(const char *buf, struct node *nodes) | |
{ | |
struct node *node; | |
const char *cp; | |
int i, line_no = 1, num_nodes = 0; | |
do { | |
/* Skip whitespaces and comments, counting line numbers */ | |
again: for (; isspace(*(unsigned char *) buf); buf++) | |
if (*buf == '\n') | |
line_no++; | |
if (*buf == '#') | |
for (buf++; *buf != '\0'; buf++) | |
if (*buf == '\n') | |
goto again; | |
/* Initialize node */ | |
node = nodes + num_nodes++; | |
(void) memset(node, 0, sizeof(struct node)); | |
node->vec.ptr = (char *) buf; | |
node->line_no = line_no; | |
node->tok = TOK_END; | |
/* Try to match one of the long tokens */ | |
for (i = 0; long_tokens[i] != NULL; i++) | |
if ((node->vec.len = | |
match_long_token(long_tokens[i], buf)) > 0) { | |
node->tok = TOK_IDENT + i; | |
break; | |
} | |
/* Try to match one character token */ | |
if (node->vec.len == 0 && *buf != '\0' && | |
((cp = strchr(one_char_tokens, *buf))) != NULL) { | |
node->tok = *cp; | |
node->vec.len = 1; | |
} | |
buf += node->vec.len; | |
#if 0 | |
printf("tok %d: [%.*s]\n", | |
node->tok, node->vec.len, node->vec.ptr); | |
#endif | |
} while (node->tok != TOK_END); | |
return (num_nodes); | |
} | |
static void bubble_sort_ops_by_priority(struct node **ops, int num_ops) | |
{ | |
struct node *tmp; | |
int i, swapped; | |
/* | |
* All left-to-right nodes must be sorted by priority. Sorting is | |
* stable, i.e. the order is not broken for the nodes of equal | |
* priority. We *DO* break the order for the right-to-left nodes, | |
* though. So same-prio RTL nodes are reversed. | |
*/ | |
do { | |
swapped = 0; | |
for (i = 0; i < num_ops - 1; i ++) | |
if ((is_rtl(ops[i]->tok) && is_rtl(ops[i + 1]->tok) && | |
prio(ops[i]->tok) == prio(ops[i + 1]->tok) && | |
ops[i] < ops[i + 1]) || | |
(prio(ops[i]->tok) < prio(ops[i + 1]->tok))) { | |
tmp = ops[i]; | |
ops[i] = ops[i + 1]; | |
ops[i + 1] = tmp; | |
swapped = 1; | |
} | |
} while (swapped == 1); | |
} | |
static void reduce(struct q *q, struct node *parent, int ano, struct llhead *h) | |
{ | |
int take_from_left; | |
struct llhead *lp; | |
struct node *node; | |
assert(is_op(parent->tok)); | |
/* Figure out which node to take - from the left or from the right */ | |
take_from_left = !ano ^ is_rtl(parent->tok); | |
lp = take_from_left ? parent->link.prev : parent->link.next; | |
node = LL_ENTRY(lp, struct node, link); | |
/* | |
* Make sure the node pointer points to the real node. It may not | |
* point to the real node if parent is: | |
* - first in the expression and is left-to-right | |
* - last in the expression and is right-to-left | |
* In these (erroneous) cases, 'lp' points to the list head, | |
* not node's link. | |
*/ | |
if (lp == h) | |
die(q, "%s: line %d", serr, node->line_no); | |
/* | |
* Delete operand from the expession list, and append it | |
* respectively to the left or right branch in the parent. | |
*/ | |
LL_DEL(&node->link); | |
if (take_from_left) | |
parent->left = node; | |
else | |
parent->right = node; | |
node->parent = parent; | |
} | |
static int qaz(const struct node *node, int num_nodes) | |
{ | |
int prev_tok = node[-1].tok; | |
return (num_nodes > 1 && | |
(prev_tok == TOK_IDENT || prev_tok == ']' || prev_tok == ')')); | |
} | |
static int expr(struct q *q, struct node *tab, int endtok, struct node **root) | |
{ | |
static struct node nil; | |
struct node *ops[MAX_EXPR_TOKS], *node, *tmp; | |
struct llhead head; | |
int i, j, num_toks, num_ops; | |
nil.tok = TOK_NIL; | |
LL_INIT(&head); | |
num_toks = num_ops = 0; | |
/* | |
* Scan the expression until end token reached. Link all tokens | |
* into one list. Insert operations into ops array. We'll use list | |
* and ops array to build the syntax tree for the expression. | |
*/ | |
while (tab[num_toks].tok != endtok) { | |
/* Link that node into expression's list */ | |
node = tab + num_toks++; | |
LL_TAIL(&head, &node->link); | |
if (node->tok == TOK_END) { | |
die(q, "line %d: unexpected EOF", node->line_no); | |
} else if (node->tok == '(' || node->tok == '[') { | |
/* Disambiguate () and [] operators */ | |
if (node->tok == '(' && !qaz(node, num_toks)) | |
LL_DEL(&node->link); | |
else if (node->tok == '[' && qaz(node, num_toks)) | |
node->tok = '.'; | |
num_toks += expr(q, node + 1, | |
node->tok == '(' ? ')' : ']', &tmp); | |
LL_TAIL(&head, &tmp->link); | |
} | |
if (is_op(node->tok)) | |
ops[num_ops++] = node; | |
} | |
/* Reduce token list into a syntax tree */ | |
bubble_sort_ops_by_priority(ops, num_ops); | |
for (i = 0; i < num_ops; i++) | |
for (j = 0; j < nargs(ops[i]->tok); j++) | |
reduce(q, ops[i], j, &head); | |
/* | |
* Exactly one node must left linked next to the list head, | |
* and that one is the root of the tree. If num_nodes is 0, then | |
* the expression is empty. In this case NIL node is returned as root. | |
*/ | |
if (head.next != head.prev) | |
die(q, "%s: line %d: %s", serr, node->line_no, "bad expr"); | |
*root = num_toks == 0 ? &nil : LL_ENTRY(head.next, struct node, link); | |
return (num_toks + 1); /* +1 is for the end-token */ | |
} | |
static void q_exec_string(struct q *q, const char *buf) | |
{ | |
struct node pool[MAX_SOURCE_SIZE], *root; | |
int idx = 0; | |
/* Allocate an array of tokens on the stack, and tokenize the source */ | |
tokenize(buf, pool); | |
/* Build syntax tree for each top-level statement */ | |
while (pool[idx].tok != TOK_END) { | |
idx += expr(q, pool + idx, ';', &root); | |
emit_expr(q, root); | |
emit_byte(q, OP_POP); | |
} | |
/* Finish compilation */ | |
emit_byte(q, OP_END); | |
} | |
static void compile(struct q *q, const char *path) | |
{ | |
char buf[MAX_SOURCE_SIZE]; | |
int n = 0; | |
FILE *fp; | |
/* Read whole source file in buffer */ | |
if ((fp = fopen(path, "r")) == NULL) { | |
die(q, "fopen(%s): %s", path, strerror(errno)); | |
} else if ((n = fread(buf, 1, sizeof(buf), fp)) < 0) { | |
die(q, "fread(%s): %s", path, strerror(errno)); | |
} else if (n == sizeof(buf)) { | |
die(q, "%s: file is too large", path); | |
} | |
/* \0 terminate it the buffer, so tokenizer knows where to stop */ | |
buf[n] = '\0'; | |
(void) fclose(fp); | |
q_exec_string(q, buf); | |
/* Initialize namespace hash. */ | |
init_hash_obj(push(q), NULL); | |
assert(q->sc == 1); | |
import_builtin_objects(q); | |
/* Point to the start of the code */ | |
disasm(q); | |
q->cc = 0; | |
execute(q); | |
LL_ADD(&loaded_modules, &q->link); | |
} | |
static struct q *q_load(const char *path, char *err_str) | |
{ | |
struct llhead *lp; | |
struct q *q; | |
/* Maybe this module is already loaded? */ | |
LL_FOREACH(&loaded_modules, lp) { | |
q = LL_ENTRY(lp, struct q, link); | |
if (strcmp(q->name, path) == 0) | |
return (q); /* Yeah */ | |
} | |
/* No, not loaded yet. Load it now. */ | |
if ((q = calloc(1, sizeof(*q))) == NULL) { | |
(void) snprintf(err_str, ERR_STR_SIZE, "%s", | |
"cannot allocate memory for q"); | |
} else if (setjmp(q->jmpbuf) != 0) { | |
q_free(q); | |
q = NULL; | |
} else { | |
q->err_str = err_str; | |
q->name = strdup(path); | |
compile(q, path); | |
} | |
return (q); | |
} | |
static void usage(void) | |
{ | |
(void) fprintf(stderr, "usage: qq [-c <file>] file\n"); | |
exit(EXIT_FAILURE); | |
} | |
static const char *get_opt(int *pargc, char ***pargv) | |
{ | |
const char *opt; | |
if ((*pargv)[1][2] != '\0') { | |
opt = &(*pargv)[1][2]; | |
(*pargc)--; | |
(*pargv)++; | |
} else { | |
opt = (*pargv)[2]; | |
(*pargc) -= 2; | |
(*pargv) += 2; | |
} | |
if (opt == NULL) | |
usage(); | |
return (opt); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
char err_str[ERR_STR_SIZE]; | |
const char *pcode_file = NULL; | |
struct q *q; | |
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != 0) | |
switch (argv[1][1]) { | |
case 'c': | |
pcode_file = get_opt(&argc, &argv); | |
break; | |
default: | |
usage(); | |
break; | |
} | |
init_op_tab(); | |
q = q_load(argc > 1 ? argv[1]: "test.txt", err_str); | |
if (q == NULL) | |
printf("%s\n", err_str); | |
else if (pcode_file != NULL) | |
q_save(pcode_file, q, err_str); | |
return (0); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* file: "tinyc.c" */ | |
/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
/* | |
* This is a compiler for the Tiny-C language. Tiny-C is a | |
* considerably stripped down version of C and it is meant as a | |
* pedagogical tool for learning about compilers. The integer global | |
* variables "a" to "z" are predefined and initialized to zero, and it | |
* is not possible to declare new variables. The compiler reads the | |
* program from standard input and prints out the value of the | |
* variables that are not zero. The grammar of Tiny-C in EBNF is: | |
* | |
* <program> ::= <statement> | |
* <statement> ::= "if" <paren_expr> <statement> | | |
* "if" <paren_expr> <statement> "else" <statement> | | |
* "while" <paren_expr> <statement> | | |
* "do" <statement> "while" <paren_expr> ";" | | |
* "{" { <statement> } "}" | | |
* <expr> ";" | | |
* ";" | |
* <paren_expr> ::= "(" <expr> ")" | |
* <expr> ::= <test> | <id> "=" <expr> | |
* <test> ::= <sum> | <sum> "<" <sum> | |
* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> | |
* <term> ::= <id> | <int> | <paren_expr> | |
* <id> ::= "a" | "b" | "c" | "d" | ... | "z" | |
* <int> ::= <an_unsigned_decimal_integer> | |
* | |
* Here are a few invocations of the compiler: | |
* | |
* % echo "a=b=c=2<3;" | ./a.out | |
* a = 1 | |
* b = 1 | |
* c = 1 | |
* % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out | |
* i = 128 | |
* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out | |
* i = 25 | |
* j = 25 | |
* % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out | |
* i = 51 | |
* % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out | |
* i = 51 | |
* % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" | ./a.out | |
* i = 7 | |
* y = 2 | |
* | |
* The compiler does a minimal amount of error checking to help | |
* highlight the structure of the compiler. | |
*/ | |
/*---------------------------------------------------------------------------*/ | |
/* Lexer. */ | |
enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR, | |
PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI }; | |
char *words[] = { "do", "else", "if", "while", NULL }; | |
int ch = ' '; | |
int sym; | |
int int_val; | |
char id_name[100]; | |
void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); } | |
void next_ch() { ch = getchar(); } | |
void next_sym() | |
{ again: switch (ch) | |
{ case ' ': case '\n': next_ch(); goto again; | |
case EOF: sym = EOI; break; | |
case '{': next_ch(); sym = LBRA; break; | |
case '}': next_ch(); sym = RBRA; break; | |
case '(': next_ch(); sym = LPAR; break; | |
case ')': next_ch(); sym = RPAR; break; | |
case '+': next_ch(); sym = PLUS; break; | |
case '-': next_ch(); sym = MINUS; break; | |
case '<': next_ch(); sym = LESS; break; | |
case ';': next_ch(); sym = SEMI; break; | |
case '=': next_ch(); sym = EQUAL; break; | |
default: | |
if (ch >= '0' && ch <= '9') | |
{ int_val = 0; /* missing overflow check */ | |
while (ch >= '0' && ch <= '9') | |
{ int_val = int_val*10 + (ch - '0'); next_ch(); } | |
sym = INT; | |
} | |
else if (ch >= 'a' && ch <= 'z') | |
{ int i = 0; /* missing overflow check */ | |
while ((ch >= 'a' && ch <= 'z') || ch == '_') | |
{ id_name[i++] = ch; next_ch(); } | |
id_name[i] = '\0'; | |
sym = 0; | |
while (words[sym] != NULL && strcmp(words[sym], id_name) != 0) | |
sym++; | |
if (words[sym] == NULL) | |
if (id_name[1] == '\0') sym = ID; else syntax_error(); | |
} | |
else | |
syntax_error(); | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Parser. */ | |
enum { VAR, CST, ADD, SUB, LT, SET, | |
IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG }; | |
struct node { int kind; struct node *o1, *o2, *o3; int val; }; | |
typedef struct node node; | |
node *new_node(int k) | |
{ node *x = (node*)malloc(sizeof(node)); x->kind = k; return x; } | |
node *paren_expr(); /* forward declaration */ | |
node *term() /* <term> ::= <id> | <int> | <paren_expr> */ | |
{ node *x; | |
if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); } | |
else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); } | |
else x = paren_expr(); | |
return x; | |
} | |
node *sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */ | |
{ node *t, *x = term(); | |
while (sym == PLUS || sym == MINUS) | |
{ t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); } | |
return x; | |
} | |
node *test() /* <test> ::= <sum> | <sum> "<" <sum> */ | |
{ node *t, *x = sum(); | |
if (sym == LESS) | |
{ t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); } | |
return x; | |
} | |
node *expr() /* <expr> ::= <test> | <id> "=" <expr> */ | |
{ node *t, *x; | |
if (sym != ID) return test(); | |
x = test(); | |
if (x->kind == VAR && sym == EQUAL) | |
{ t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); } | |
return x; | |
} | |
node *paren_expr() /* <paren_expr> ::= "(" <expr> ")" */ | |
{ node *x; | |
if (sym == LPAR) next_sym(); else syntax_error(); | |
x = expr(); | |
if (sym == RPAR) next_sym(); else syntax_error(); | |
return x; | |
} | |
node *statement() | |
{ node *t, *x; | |
if (sym == IF_SYM) /* "if" <paren_expr> <statement> */ | |
{ x = new_node(IF1); | |
next_sym(); | |
x->o1 = paren_expr(); | |
x->o2 = statement(); | |
if (sym == ELSE_SYM) /* ... "else" <statement> */ | |
{ x->kind = IF2; | |
next_sym(); | |
x->o3 = statement(); | |
} | |
} | |
else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */ | |
{ x = new_node(WHILE); | |
next_sym(); | |
x->o1 = paren_expr(); | |
x->o2 = statement(); | |
} | |
else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */ | |
{ x = new_node(DO); | |
next_sym(); | |
x->o1 = statement(); | |
if (sym == WHILE_SYM) next_sym(); else syntax_error(); | |
x->o2 = paren_expr(); | |
if (sym == SEMI) next_sym(); else syntax_error(); | |
} | |
else if (sym == SEMI) /* ";" */ | |
{ x = new_node(EMPTY); next_sym(); } | |
else if (sym == LBRA) /* "{" { <statement> } "}" */ | |
{ x = new_node(EMPTY); | |
next_sym(); | |
while (sym != RBRA) | |
{ t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); } | |
next_sym(); | |
} | |
else /* <expr> ";" */ | |
{ x = new_node(EXPR); | |
x->o1 = expr(); | |
if (sym == SEMI) next_sym(); else syntax_error(); | |
} | |
return x; | |
} | |
node *program() /* <program> ::= <statement> */ | |
{ node *x = new_node(PROG); | |
next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error(); | |
return x; | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Code generator. */ | |
enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT }; | |
typedef char code; | |
code object[1000], *here = object; | |
void g(code c) { *here++ = c; } /* missing overflow check */ | |
code *hole() { return here++; } | |
void fix(code *src, code *dst) { *src = dst-src; } /* missing overflow check */ | |
void c(node *x) | |
{ code *p1, *p2; | |
switch (x->kind) | |
{ case VAR : g(IFETCH); g(x->val); break; | |
case CST : g(IPUSH); g(x->val); break; | |
case ADD : c(x->o1); c(x->o2); g(IADD); break; | |
case SUB : c(x->o1); c(x->o2); g(ISUB); break; | |
case LT : c(x->o1); c(x->o2); g(ILT); break; | |
case SET : c(x->o2); g(ISTORE); g(x->o1->val); break; | |
case IF1 : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break; | |
case IF2 : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole(); | |
fix(p1,here); c(x->o3); fix(p2,here); break; | |
case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2); | |
g(JMP); fix(hole(),p1); fix(p2,here); break; | |
case DO : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break; | |
case EMPTY: break; | |
case SEQ : c(x->o1); c(x->o2); break; | |
case EXPR : c(x->o1); g(IPOP); break; | |
case PROG : c(x->o1); g(HALT); break; | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Virtual machine. */ | |
int globals[26]; | |
void run() | |
{ int stack[1000], *sp = stack; | |
code *pc = object; | |
again: switch (*pc++) | |
{ case IFETCH: *sp++ = globals[*pc++]; goto again; | |
case ISTORE: globals[*pc++] = sp[-1]; goto again; | |
case IPUSH : *sp++ = *pc++; goto again; | |
case IPOP : --sp; goto again; | |
case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; goto again; | |
case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; goto again; | |
case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; goto again; | |
case JMP : pc += *pc; goto again; | |
case JZ : if (*--sp == 0) pc += *pc; else pc++; goto again; | |
case JNZ : if (*--sp != 0) pc += *pc; else pc++; goto again; | |
} | |
} | |
/*---------------------------------------------------------------------------*/ | |
/* Main program. */ | |
int main() | |
{ int i; | |
c(program()); | |
for (i=0; i<26; i++) | |
globals[i] = 0; | |
run(); | |
for (i=0; i<26; i++) | |
if (globals[i] != 0) | |
printf("%c = %d\n", 'a'+i, globals[i]); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <stdlib.h> | |
#include <string.h> | |
// === runtime === | |
enum { tObject, tNumber, tString }; | |
typedef struct { | |
char type; | |
union { | |
char *string; | |
int number; | |
} value; | |
int refcount; | |
} Object; | |
static Object *TrueObject; | |
static Object *FalseObject; | |
static Object *NilObject; | |
Object *Object_new() { | |
return calloc(1, sizeof(Object)); | |
} | |
/////////////// GC /////////////// | |
Object *retain(Object *self) { | |
self->refcount++; | |
return self; | |
} | |
void release(Object *self) { | |
self->refcount--; | |
if (self->refcount <= 0) | |
free(self); | |
} | |
/////////////////////////////////// | |
char Object_is_true(Object *self) { | |
// false and nil are == false, | |
// everything else is true. | |
if (self == FalseObject || self == NilObject) return 0; | |
return 1; | |
} | |
Object *Number_new(int value) { | |
Object *object = Object_new(); | |
object->type = tNumber; | |
object->value.number = value; | |
return object; | |
} | |
int Number_value(Object *self) { | |
assert(self->type == tNumber); | |
return self->value.number; | |
} | |
Object *String_new(char *value) { | |
Object *object = Object_new(); | |
object->type = tString; | |
object->value.string = value; | |
return object; | |
} | |
Object *call(Object *receiver, char *message, Object *argv[], int argc) { | |
// HACK hardcoded methods. In a real runtime, you'd have proper classes and methods. | |
// See runtime.rb in our interpreter for an example. | |
if (strcmp(message, "+") == 0) { | |
// Number#+ | |
return Number_new(receiver->value.number + argv[0]->value.number); | |
} else if (strcmp(message, "print") == 0) { | |
// Object#print | |
switch (argv[0]->type) { | |
case tNumber: | |
printf("%d\n", argv[0]->value.number); | |
return argv[0]; | |
case tString: | |
printf("%s\n", argv[0]->value.string); | |
return argv[0]; | |
} | |
} | |
return 0; | |
} | |
void init_runtime() { | |
TrueObject = retain(Object_new()); | |
FalseObject = retain(Object_new()); | |
NilObject = retain(Object_new()); | |
} | |
void destroy_runtime() { | |
release(TrueObject); | |
release(FalseObject); | |
release(NilObject); | |
} | |
// === opcode === | |
enum { | |
// ------- Stack ------- | |
// Opcode Operands before after | |
/* 00 */ CALL, // index, argc [rcv, arg...] [returned] | |
/* 01 */ PUSH_NUMBER, // index [] [number] | |
/* 02 */ PUSH_STRING, // index [] [string] | |
/* 03 */ PUSH_SELF, // [] [self] | |
/* 04 */ PUSH_NIL, // [] [nil] | |
/* 05 */ PUSH_BOOL, // 1=t, 0=f [] [true or false] | |
/* 06 */ GET_LOCAL, // index [] [value] | |
/* 07 */ SET_LOCAL, // index [value] [] | |
/* 08 */ JUMP_UNLESS, // offset [test] [] | |
/* 09 */ JUMP, // offset [] [] | |
/* 10 */ ADD, // [a, b] [result] | |
/* 11 */ RETURN // [] [] | |
}; | |
typedef unsigned char byte; // 1 byte | |
// === bytecode === | |
/* | |
This file was compiled by the `compile` script. | |
It contains the bytecode for the following code: | |
print("the answer is:") | |
a = 30 + 2 | |
if false | |
print(a) | |
else | |
print("no") | |
end | |
*/ | |
#define LITERALS { \ | |
(void *) "the answer is:", \ | |
(void *) "print", \ | |
(void *) 30, \ | |
(void *) 2, \ | |
(void *) "no", \ | |
} | |
#define INSTRUCTIONS { 3, 2, 0, 0, 1, 1, 1, 2, 1, 3, 10, 7, 0, 5, 0, 8, 8, 3, 6, 0, 0, 1, 1, 9, 6, 3, 2, 4, 0, 1, 1, 11 } | |
// Helpers to play with the stack | |
#define LOCALS_MAX 10 | |
#define STACK_MAX 10 | |
#define STACK_PUSH(I) do { \ | |
assert(sp-stack < STACK_MAX); \ | |
*(++sp) = (I); \ | |
retain(*sp); \ | |
} while(0) | |
#define STACK_POP() (*sp--) | |
void run(void *literals[], byte instructions[]) { | |
byte *ip = instructions; // instruction pointer | |
Object *stack[STACK_MAX]; // THE famous stack | |
Object **sp = stack; // stack pointer, keeps track of current position | |
Object *locals[LOCALS_MAX] = {}; // where we store our local variables | |
// Setup the runtime | |
Object *self = Object_new(); | |
retain(self); | |
// Start processing instructions | |
while (1) { | |
switch (*ip) { | |
case PUSH_NUMBER: { | |
ip++; // advance to the operand (literal index) | |
STACK_PUSH(Number_new((long)literals[*ip])); | |
break; | |
} | |
case PUSH_STRING: { | |
ip++; // advance to the operand (literal index) | |
STACK_PUSH(String_new((char *)literals[*ip])); | |
break; | |
} | |
case PUSH_SELF: { | |
STACK_PUSH(self); | |
break; | |
} | |
case PUSH_NIL: { | |
STACK_PUSH(NilObject); | |
break; | |
} | |
case PUSH_BOOL: { | |
ip++; // advance to operand (0 = false, 1 = true) | |
if (*ip == 0) { | |
STACK_PUSH(FalseObject); | |
} else { | |
STACK_PUSH(TrueObject); | |
} | |
break; | |
} | |
case CALL: { | |
ip++; // advance to operand (method name index in literals) | |
char *method = literals[*ip]; | |
ip++; // advance to operand (# of args) | |
int argc = *ip; | |
Object *argv[10]; | |
int i; | |
for (i = argc - 1; i >= 0; i--) argv[i] = STACK_POP(); | |
Object *receiver = STACK_POP(); | |
Object *result = call(receiver, method, argv, argc); | |
STACK_PUSH(result); | |
// Release all the objects | |
for (i = argc - 1; i >= 0; i--) release(argv[i]); | |
release(receiver); | |
break; | |
} | |
case RETURN: { | |
goto cleanup; | |
break; | |
} | |
case GET_LOCAL: { | |
ip++; // advance operand (local index) | |
STACK_PUSH(locals[*ip]); | |
break; | |
} | |
case SET_LOCAL: { | |
ip++; // local index | |
locals[*ip] = STACK_POP(); | |
break; | |
} | |
case ADD: { | |
Object *a = STACK_POP(); | |
Object *b = STACK_POP(); | |
STACK_PUSH(Number_new(Number_value(a) + Number_value(b))); | |
release(a); | |
release(b); | |
break; | |
} | |
case JUMP_UNLESS: { | |
ip++; // operand (offset, # of bytes to jump forward) | |
byte offset = *ip; | |
Object *condition = STACK_POP(); | |
if (!Object_is_true(condition)) ip += offset; | |
release(condition); | |
break; | |
} | |
case JUMP: { | |
ip++; // operand (offset, # of bytes to jump forward) | |
byte offset = *ip; | |
ip += offset; | |
break; | |
} | |
} | |
ip++; | |
} | |
cleanup: | |
release(self); | |
int i; | |
// Release all the local variables | |
for (i = 0; i < LOCALS_MAX; i++) if (locals[i]) release(locals[i]); | |
// Release everything left on the stack | |
while (sp > stack) release(STACK_POP()); | |
} | |
int main (int argc, char const *argv[]) { | |
// Get compiled literals table and instructions from bytecode.h | |
void *literals[] = LITERALS; | |
byte instructions[] = INSTRUCTIONS; | |
init_runtime(); | |
// Run that thing! | |
run(literals, instructions); | |
destroy_runtime(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment