Skip to content

Instantly share code, notes, and snippets.

@andersonsp
Last active August 29, 2015 13:57
Show Gist options
  • Save andersonsp/9744663 to your computer and use it in GitHub Desktop.
Save andersonsp/9744663 to your computer and use it in GitHub Desktop.
Valenok's QVM
#include <stdlib.h>
#include <string.h>
#define KEY_LENGTH 64
#define HTAB_SIZE 4096
typedef struct
{
char* key;
int value;
} htable_node;
typedef struct
{
int num_nodes;
htable_node* nodes[HTAB_SIZE];
} hash_table;
hash_table* create_htab();
void destroy_htab(hash_table* htab);
inline int htab_add(hash_table* htab, char* key, int value);
inline int htab_find(hash_table* htab, char* key);
inline unsigned int htab_hash(char* key);
hash_table* create_htab()
{
hash_table* htab;
htab = (hash_table*)malloc(sizeof(hash_table));
int i; for(i = 0; i < HTAB_SIZE; i++) htab->nodes[i] = NULL;
return htab;
}
void destroy_htab(hash_table* htab)
{
int i; for(i = 0; i < HTAB_SIZE; i++)
{
if(htab->nodes[i])
{
free(htab->nodes[i]->key);
free(htab->nodes[i]);
}
}
free(htab);
htab = NULL;
}
int htab_add(hash_table* htab, char* k, int v)
{
int hash = htab_hash(k);
// If the node is not already occupied, allocate space, and copy the key/value pair.
if(htab->nodes[hash] == NULL) htab->nodes[hash] = malloc(sizeof(htable_node));
else return 1;
htab->nodes[hash]->key = (char*)malloc(sizeof(char) * (strlen(k) + 1));
strcpy(htab->nodes[hash]->key, k);
htab->nodes[hash]->value = v;
return 0;
}
int htab_find(hash_table* htab, char* key)
{
int hash = htab_hash(key);
if(htab->nodes[hash] != NULL) return htab->nodes[hash]->value;
else return -1;
}
unsigned int htab_hash(char* k)
{
unsigned int hash = 1;
char* c; for(c = k; *c; c++)
hash += (hash << *c) - *c;
return hash % HTAB_SIZE;
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ICACHE 0 /* nonzero to enable point-of-send inline cache */
#define MCACHE 0 /* nonzero to enable global method cache */
typedef struct _Vtable Vtable;
typedef struct _Object Object;
typedef struct _Closure Closure;
typedef struct _Symbol Symbol;
typedef Object *(*imp_t)(Closure *closure, Object *receiver, ...);
struct _Vtable {
Vtable *vt;
int size;
int tally;
Object **keys;
Object **values;
Vtable *parent;
};
struct _Object {
Vtable *vt;
};
struct _Closure {
Vtable *vt;
imp_t method;
Object *data;
};
struct _Symbol {
Vtable *vt;
char *string;
};
Vtable *SymbolList= 0;
Vtable *vtable_vt;
Vtable *object_vt;
Vtable *symbol_vt;
Vtable *closure_vt;
Object *s_addMethod = 0;
Object *s_allocate = 0;
Object *s_delegated = 0;
Object *s_lookup = 0;
extern inline void *alloc(size_t size) {
Vtable **ppvt= (Vtable **)calloc(1, sizeof(Vtable *) + size);
return (void *)(ppvt + 1);
}
Object *symbol_new(char *string) {
Symbol *symbol = (Symbol *)alloc(sizeof(Symbol));
symbol->vt = symbol_vt;
symbol->string = strdup(string);
return (Object *)symbol;
}
Object *closure_new(imp_t method, Object *data) {
Closure *closure = (Closure *)alloc(sizeof(Closure));
closure->vt = closure_vt;
closure->method = method;
closure->data = data;
return (Object *)closure;
}
Object *vtable_lookup(Closure *closure, Vtable *self, Object *key);
#if ICACHE
# define send(RCV, MSG, ARGS...) ({ \
struct object *r = (Object *)(RCV); \
static Vtable *prevVT = 0; \
static Closure *closure = 0; \
register Vtable *thisVT = r->vt; \
thisVT == prevVT \
? closure \
: (prevVT = thisVT, \
closure = bind(r, (MSG))); \
closure->method(closure, r, ##ARGS); \
})
#else
# define send(RCV, MSG, ARGS...) ({ \
Object *r = (Object *)(RCV); \
Closure *c = bind(r, (MSG)); \
c->method(c, r, ##ARGS); \
})
#endif
#if MCACHE
struct entry {
Vtable *vtable;
Object *selector;
Closure *closure;
} MethodCache[8192];
#endif
Closure *bind(Object *rcv, Object *msg) {
Closure *c;
Vtable *vt = rcv->vt;
#if MCACHE
struct entry *cl = MethodCache + ((((unsigned)vt << 2) ^ ((unsigned)msg >> 3)) & ((sizeof(MethodCache) / sizeof(struct entry)) - 1));
if (cl->vtable == vt && cl->selector == msg)
return cl->closure;
#endif
c = ((msg == s_lookup) && (rcv == (Object *)vtable_vt))
? (Closure *)vtable_lookup(0, vt, msg)
: (Closure *)send(vt, s_lookup, msg);
#if MCACHE
cl->vtable = vt;
cl->selector = msg;
cl->closure = c;
#endif
return c;
}
Vtable *vtable_delegated(Closure *closure, Vtable *self) {
Vtable *child= (Vtable *)alloc(sizeof(Vtable));
child->vt = self ? self->vt : 0;
child->size = 2;
child->tally = 0;
child->keys = (Object **)calloc(child->size, sizeof(Object *));
child->values = (Object **)calloc(child->size, sizeof(Object *));
child->parent = self;
return child;
}
Object *vtable_allocate(Closure *closure, Vtable *self, int payloadSize) {
Object *object = (Object *)alloc(payloadSize);
object->vt = self;
return object;
}
imp_t vtable_add_method(Closure *closure, Vtable *self, Object *key, imp_t method) {
int i;
for (i = 0; i < self->tally; ++i)
if (key == self->keys[i])
return ((Closure *)self->values[i])->method = method;
if( self->tally == self->size ) {
self->size *= 2;
self->keys = (Object **)realloc(self->keys, sizeof(Object *) * self->size);
self->values = (Object **)realloc(self->values, sizeof(Object *) * self->size);
}
self->keys [self->tally ] = key;
self->values[self->tally++] = closure_new(method, 0);
return method;
}
Object *vtable_lookup(Closure *closure, Vtable *self, Object *key) {
int i;
for (i = 0; i < self->tally; ++i)
if(key == self->keys[i]) return self->values[i];
if (self->parent) return send(self->parent, s_lookup, key);
fprintf(stderr, "lookup failed %p %s\n", self, ((Symbol *)key)->string);
return 0;
}
Object *symbol_intern(Closure *closure, Object *self, char *string) {
Object *symbol;
int i;
for(i = 0; i < SymbolList->tally; ++i) {
symbol = SymbolList->keys[i];
if( !strcmp(string, ((Symbol *)symbol)->string) ) return symbol;
}
symbol = symbol_new(string);
vtable_add_method(0, SymbolList, symbol, 0);
return symbol;
}
void init(void) {
vtable_vt = vtable_delegated(0, 0);
vtable_vt->vt = vtable_vt;
object_vt = vtable_delegated(0, 0);
object_vt->vt = vtable_vt;
vtable_vt->parent = object_vt;
symbol_vt = vtable_delegated(0, object_vt);
closure_vt = vtable_delegated(0, object_vt);
SymbolList = vtable_delegated(0, 0);
s_lookup = symbol_intern(0, 0, "lookup");
s_addMethod = symbol_intern(0, 0, "addMethod");
s_allocate = symbol_intern(0, 0, "allocate");
s_delegated = symbol_intern(0, 0, "delegated");
vtable_add_method(0, vtable_vt, s_lookup, (imp_t)vtable_lookup);
vtable_add_method(0, vtable_vt, s_addMethod, (imp_t)vtable_add_method);
send(vtable_vt, s_addMethod, s_allocate, vtable_allocate);
send(vtable_vt, s_addMethod, s_delegated, vtable_delegated);
}
/*----------------------------------------------------------------*/
Object *s_new= 0;
Object *s_length= 0;
struct Number {
Vtable *vt;
};
Vtable *Number_vt = 0;
Object *Number = 0;
Object *Number_new(Closure *closure, struct Number *self) {
fprintf(stderr, "Number_new\n");
exit(1);
return 0;
}
int Number_length(Closure *closure, struct Number *self) {
fprintf(stderr, "Number has no length\n");
exit(1);
return 0;
}
struct String {
Vtable *vt;
int length;
char *chars;
};
Vtable *String_vt = 0;
Object *String = 0;
Object *String_new(Closure *closure, struct String *self, int size) {
struct String *clone = (struct String *)send(self->vt, s_allocate, sizeof(struct String));
clone->length = size;
clone->chars = (char *)malloc(size);
return (Object *)clone;
}
int String_length(Closure *closure, struct String *self) {
return self->length;
}
struct Vector {
Vtable *vt;
int length;
Object *contents;
};
Vtable *Vector_vt = 0;
Object *Vector = 0;
Object *Vector_new(Closure *closure, struct Vector *self, int size) {
struct Vector *clone = (struct Vector *)send(self->vt, s_allocate, sizeof(struct Vector));
clone->length = size;
clone->contents = (Object *)calloc(size, sizeof(Object *));
return (Object *)clone;
}
int Vector_length(Closure *closure, struct Vector *self) {
return self->length;
}
void init2(void) {
s_new = symbol_intern(0, 0, "new");
s_length = symbol_intern(0, 0, "length");
Number_vt = (Vtable *)send(object_vt, s_delegated);
String_vt = (Vtable *)send(object_vt, s_delegated);
Vector_vt = (Vtable *)send(object_vt, s_delegated);
send(Number_vt, s_addMethod, s_new, Number_new);
send(String_vt, s_addMethod, s_new, String_new);
send(Vector_vt, s_addMethod, s_new, Vector_new);
send(Number_vt, s_addMethod, s_length, Number_length);
send(String_vt, s_addMethod, s_length, String_length);
send(Vector_vt, s_addMethod, s_length, Vector_length);
Number = send(Number_vt, s_allocate, 0);
String = send(String_vt, s_allocate, 0);
Vector = send(Vector_vt, s_allocate, 0);
}
/*----------------------------------------------------------------*/
void doit(void) {
int i, j;
Object *a = send(String, s_new, 1);
Object *b = send(Vector, s_new, 1);
for (i = 0, j = 0; i < 1000000; ++i) {
j += (int)send(a, s_length) + (int)send(b, s_length);
j += (int)send(a, s_length) + (int)send(b, s_length);
j += (int)send(a, s_length) + (int)send(b, s_length);
j += (int)send(a, s_length) + (int)send(b, s_length);
j += (int)send(a, s_length) + (int)send(b, s_length);
}
printf("total %d\n", j);
}
int main() {
init();
init2();
doit();
return 0;
}
/* tp_obj tp_track(TP,tp_obj v) { return v; }
void tp_grey(TP,tp_obj v) { }
void tp_full(TP) { }
void tp_gc_init(TP) { }
void tp_gc_deinit(TP) { }
void tp_delete(TP,tp_obj v) { }*/
/*
#ifdef TP_NORMALMAKE
#include "tp.h"
#include "list_auto.h"
#include "dict_auto.h"
#endif
void tp_grey(TP,tp_obj v) {
if (v.type < TP_STRING || (!v.gci.data) || *v.gci.data) { return; }
*v.gci.data = 1;
if (v.type == TP_STRING || v.type == TP_DATA) {
_tp_list_appendx(tp,tp->black,v);
return;
}
_tp_list_appendx(tp,tp->grey,v);
}
void tp_follow(TP,tp_obj v) {
int type = v.type;
if (type == TP_LIST) {
int n;
for (n=0; n<v.list.val->len; n++) {
tp_grey(tp,v.list.val->items[n]);
}
}
if (type == TP_DICT) {
int i;
for (i=0; i<v.dict.val->len; i++) {
int n = _tp_dict_next(tp,v.dict.val);
tp_grey(tp,v.dict.val->items[n].key);
tp_grey(tp,v.dict.val->items[n].val);
}
tp_grey(tp,v.dict.val->meta);
}
if (type == TP_FNC) {
tp_grey(tp,v.fnc.info->self);
tp_grey(tp,v.fnc.info->globals);
tp_grey(tp,v.fnc.info->code);
}
}
void tp_reset(TP) {
int n;
_tp_list *tmp;
for (n=0; n<tp->black->len; n++) {
*tp->black->items[n].gci.data = 0;
}
tmp = tp->white;
tp->white = tp->black;
tp->black = tmp;
}
void tp_gc_init(TP) {
tp->white = _tp_list_new(tp);
tp->grey = _tp_list_new(tp);
tp->black = _tp_list_new(tp);
tp->steps = 0;
}
void tp_gc_deinit(TP) {
_tp_list_free(tp, tp->white);
_tp_list_free(tp, tp->grey);
_tp_list_free(tp, tp->black);
}
void tp_delete(TP,tp_obj v) {
int type = v.type;
if (type == TP_LIST) {
_tp_list_free(tp, v.list.val);
return;
} else if (type == TP_DICT) {
_tp_dict_free(tp, v.dict.val);
return;
} else if (type == TP_STRING) {
tp_free(tp, v.string.info);
return;
} else if (type == TP_DATA) {
if (v.data.info->free) {
v.data.info->free(tp,v);
}
tp_free(tp, v.data.info);
return;
} else if (type == TP_FNC) {
tp_free(tp, v.fnc.info);
return;
}
else if( type == TP_FRAME ) {
tp_frame_ *f = v.frame.val;
tp_free(tp, f);
return;
}
tp_raise(,tp_string("(tp_delete) TypeError: ?"));
}
void tp_collect(TP) {
int n;
for (n=0; n<tp->white->len; n++) {
tp_obj r = tp->white->items[n];
if (*r.gci.data) { continue; }
tp_delete(tp,r);
}
tp->white->len = 0;
tp_reset(tp);
}
void _tp_gcinc(TP) {
tp_obj v;
if (!tp->grey->len) {
return;
}
v = _tp_list_pop(tp,tp->grey,tp->grey->len-1,"_tp_gcinc");
tp_follow(tp,v);
_tp_list_appendx(tp,tp->black,v);
}
void tp_full(TP) {
while (tp->grey->len) {
_tp_gcinc(tp);
}
tp_collect(tp);
tp_follow(tp,tp->root);
}
void tp_gcinc(TP) {
tp->steps += 1;
if (tp->steps < TP_GCMAX || tp->grey->len > 0) {
_tp_gcinc(tp); _tp_gcinc(tp);
}
if (tp->steps < TP_GCMAX || tp->grey->len > 0) { return; }
tp->steps = 0;
tp_full(tp);
return;
}
tp_obj tp_track(TP,tp_obj v) {
tp_gcinc(tp);
tp_grey(tp,v);
return v;
}
# Copyright (c) 2006, 2007 by Sergey Lyubka
# All rights reserved
#
# $Id$
SRCS= q.c
PROG= q
CFLAGS= -W -Wall -g -pedantic -ansi -D_BSD_SOURCE $(COPT)
all: $(PROG)
$(PROG): $(HDRS) $(SRCS)
$(CC) $(CFLAGS) $(SRCS) -o $@
clean:
rm -f *.o $(PROG) *core
#include <stdio.h>
#include <stdlib.h>
#define STACK_MAX 256
typedef enum {
OBJ_INT,
OBJ_PAIR
} ObjectType;
typedef struct sObject {
ObjectType type;
unsigned char marked;
/* The next object in the linked list of heap allocated objects. */
struct sObject* next;
union {
/* OBJ_INT */
int value;
/* OBJ_PAIR */
struct {
struct sObject* head;
struct sObject* tail;
};
};
} Object;
typedef struct {
Object* stack[STACK_MAX];
int stackSize;
/* The first object in the linked list of all objects on the heap. */
Object* firstObject;
/* The total number of currently allocated objects. */
int numObjects;
/* The number of objects required to trigger a GC. */
int maxObjects;
} VM;
void assert(int condition, const char* message) {
if (!condition) {
printf("%s\n", message);
exit(1);
}
}
VM* newVM() {
VM* vm = malloc(sizeof(VM));
vm->stackSize = 0;
vm->firstObject = NULL;
vm->numObjects = 0;
vm->maxObjects = 8;
return vm;
}
void push(VM* vm, Object* value) {
assert(vm->stackSize < STACK_MAX, "Stack overflow!");
vm->stack[vm->stackSize++] = value;
}
Object* pop(VM* vm) {
assert(vm->stackSize > 0, "Stack underflow!");
return vm->stack[--vm->stackSize];
}
void mark(Object* object) {
/* If already marked, we're done. Check this first to avoid recursing
on cycles in the object graph. */
if (object->marked) return;
object->marked = 1;
if (object->type == OBJ_PAIR) {
mark(object->head);
mark(object->tail);
}
}
void markAll(VM* vm)
{
for (int i = 0; i < vm->stackSize; i++) {
mark(vm->stack[i]);
}
}
void sweep(VM* vm)
{
Object** object = &vm->firstObject;
while (*object) {
if (!(*object)->marked) {
/* This object wasn't reached, so remove it from the list and free it. */
Object* unreached = *object;
*object = unreached->next;
free(unreached);
vm->numObjects--;
} else {
/* This object was reached, so unmark it (for the next GC) and move on to
the next. */
(*object)->marked = 0;
object = &(*object)->next;
}
}
}
void gc(VM* vm) {
int numObjects = vm->numObjects;
markAll(vm);
sweep(vm);
vm->maxObjects = vm->numObjects * 2;
printf("Collected %d objects, %d remaining.\n", numObjects - vm->numObjects,
vm->numObjects);
}
Object* newObject(VM* vm, ObjectType type) {
if (vm->numObjects == vm->maxObjects) gc(vm);
Object* object = malloc(sizeof(Object));
object->type = type;
object->next = vm->firstObject;
vm->firstObject = object;
object->marked = 0;
vm->numObjects++;
return object;
}
void pushInt(VM* vm, int intValue) {
Object* object = newObject(vm, OBJ_INT);
object->value = intValue;
push(vm, object);
}
Object* pushPair(VM* vm) {
Object* object = newObject(vm, OBJ_PAIR);
object->tail = pop(vm);
object->head = pop(vm);
push(vm, object);
return object;
}
void objectPrint(Object* object) {
switch (object->type) {
case OBJ_INT:
printf("%d", object->value);
break;
case OBJ_PAIR:
printf("(");
objectPrint(object->head);
printf(", ");
objectPrint(object->tail);
printf(")");
break;
}
}
void freeVM(VM *vm) {
vm->stackSize = 0;
gc(vm);
free(vm);
}
void test1() {
printf("Test 1: Objects on stack are preserved.\n");
VM* vm = newVM();
pushInt(vm, 1);
pushInt(vm, 2);
gc(vm);
assert(vm->numObjects == 2, "Should have preserved objects.");
freeVM(vm);
}
void test2() {
printf("Test 2: Unreached objects are collected.\n");
VM* vm = newVM();
pushInt(vm, 1);
pushInt(vm, 2);
pop(vm);
pop(vm);
gc(vm);
assert(vm->numObjects == 0, "Should have collected objects.");
freeVM(vm);
}
void test3() {
printf("Test 3: Reach nested objects.\n");
VM* vm = newVM();
pushInt(vm, 1);
pushInt(vm, 2);
pushPair(vm);
pushInt(vm, 3);
pushInt(vm, 4);
pushPair(vm);
pushPair(vm);
gc(vm);
assert(vm->numObjects == 7, "Should have reached objects.");
freeVM(vm);
}
void test4() {
printf("Test 4: Handle cycles.\n");
VM* vm = newVM();
pushInt(vm, 1);
pushInt(vm, 2);
Object* a = pushPair(vm);
pushInt(vm, 3);
pushInt(vm, 4);
Object* b = pushPair(vm);
a->tail = b;
b->tail = a;
gc(vm);
assert(vm->numObjects == 4, "Should have collected objects.");
freeVM(vm);
}
void perfTest() {
printf("Performance Test.\n");
VM* vm = newVM();
for (int i = 0; i < 1000; i++) {
for (int j = 0; j < 20; j++) {
pushInt(vm, i);
}
for (int k = 0; k < 20; k++) {
pop(vm);
}
}
freeVM(vm);
}
int main(int argc, const char * argv[]) {
test1();
test2();
test3();
test4();
perfTest();
return 0;
}
/*
* Copyright (c) 2004-2007 Sergey Lyubka <[email protected]>
* All rights reserved
*
* "THE BEER-WARE LICENSE" (Revision 42):
* Sergey Lyubka wrote this file. As long as you retain this notice you
* can do whatever you want with this stuff. If we meet some day, and you think
* this stuff is worth it, you can buy me a beer in return.
*
* $Id$
*/
#include <sys/stat.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <setjmp.h>
#include <stddef.h>
#include <errno.h>
#define MAX_SOURCE_SIZE 32768 /* Maximum source file size */
#define CHUNK_SIZE 64 /* Runtime stack growing size */
#define ERR_STR_SIZE 256 /* Error buffer size */
#define HASH_SIZE 13 /* Hash object hash table size */
#define PCODE_MAGIC "ABC1" /* Compiled p-code signature */
#define MAX_EXPR_TOKS 512 /* Maximum tokens in expression */
#define ARRAY_SIZE(array) ((int) (sizeof(array) / sizeof(array[0])))
struct q;
struct obj;
/*
* Circular doubly-linked list macros: http://silversoft.net/~devnull/llist.h
*/
struct llhead {
struct llhead *next;
struct llhead *prev;
};
#define LL_INIT(N) ((N)->next = (N)->prev = (N))
#define LL_HEAD(H) struct llhead H = {&H, &H}
#define LL_NULL {NULL, NULL}
#define LL_ENTRY(P,T,N) ((T *)((char *)(P) - offsetof(T, N)))
#define LL_ADD(H, N) \
do { \
((H)->next)->prev = (N); \
(N)->next = ((H)->next); \
(N)->prev = (H); \
(H)->next = (N); \
} while (0)
#define LL_TAIL(H, N) \
do { \
((H)->prev)->next = (N); \
(N)->prev = ((H)->prev); \
(N)->next = (H); \
(H)->prev = (N); \
} while (0)
#define LL_DEL(N) \
do { \
((N)->next)->prev = ((N)->prev); \
((N)->prev)->next = ((N)->next); \
LL_INIT(N); \
} while (0)
#define LL_EMPTY(N) ((N)->next == (N))
#define LL_FOREACH(H,N) for (N = (H)->next; N != (H); N = (N)->next)
#define LL_FOREACH_SAFE(H,N,T) \
for (N = (H)->next, T = (N)->next; N != (H); \
N = (T), T = (N)->next)
struct hash {
struct obj *parent; /* Parent object */
struct llhead glob_list; /* For all entries in the hash */
struct llhead tab[HASH_SIZE]; /* Hash table itself */
};
/*
* Typedefs to abstract environment-specific types
*/
typedef void (*q_op_handler_t)(struct q *);
typedef double numeric_t; /* Type for numeric values */
typedef size_t str_size_t; /* For string lengths */
typedef int code_ofs_t; /* For relative JUMP operands */
/*
* Strings are arbitrary chunks of uninterpreted binary data.
* Represented by a pointer to data and data length in bytes.
*/
struct vec {
int len; /* Length of binary data */
char *ptr; /* Pointer to binary data */
};
/*
* Value of any supported type
*/
union val {
struct vec str; /* String value */
q_op_handler_t cfunc; /* Function implemented in C */
struct hash *hash; /* Hash value */
numeric_t num; /* Numeric value */
code_ofs_t ofs; /* Jump offset value */
};
/*
* Object. This is a type marker and associated value.
*/
struct obj {
unsigned char type; /* TYPE_XXX, see below */
union val val; /* Value holder */
};
/*
* Hash entry
*/
struct he {
struct llhead link; /* Hash cell linkage */
struct obj key; /* Entry' key */
struct obj val; /* Entry's value */
};
/*
* Q context
*/
struct q {
struct llhead link; /* Linkage */
char *name; /* Module name */
char *code; /* Code */
struct obj *stack; /* Stack */
int sc; /* Stack counter */
int fc; /* Call frame pointer */
int cc; /* Code counter */
int cs; /* Code size */
int ss; /* Stack size */
jmp_buf jmpbuf; /* Exception environ */
char *err_str; /* Error message placeholder */
};
/*
* This is a parse token and syntax tree node at the same time
*/
struct node {
struct llhead link; /* Linkage in expression */
struct vec vec; /* Points to the source code */
int tok; /* Token's value */
int line_no; /* Line number in the source */
struct node *left;
struct node *right;
struct node *parent;
};
/*
* Types and their names
*/
enum {TYPE_NIL, TYPE_NUM, TYPE_STR, TYPE_HASH, TYPE_FUNC, TYPE_CFUNC};
static const char *types[] = {"nil", "num", "str", "hash", "func", "c-func"};
/*
* Opcodes and their names. Names are prepended by a letter that specifies
* opcode parameters (used by disassembly function)
*/
enum {
OP_END, OP_CALL, OP_RETURN, OP_SET, OP_GET, OP_PUSH, OP_MATH,
OP_POP, OP_JUMP, OP_DUP, OP_PUSHNS, OP_NEWMAP, NUM_INSTTRUCTIONS
};
static const char *opcodes[] = {
"xEND", "xCALL", "xRETURN", "xSET", "xGET", "tPUSH", "aMATH",
"xPOP", "xJUMP", "nDUP", "xPUSHNS", "xNEWMAP"
};
static const char *serr = "syntax error";
static const char *rerr = "runtime error";
/*
* Tokens. XXX Make sure that long token values do not clash with one char.
* We start numbering long tokens from 'a', which gives us at least
* 26 English letters; there are no letters in one char tokens.
*/
static const char *one_char_tokens = "=[](){}+-*/^,;.\"";
enum {
TOK_IDENT = 'a', TOK_NUM, TOK_STR, TOK_FUNC, TOK_IF, TOK_ELSE,
TOK_ADDE, TOK_SUBE, TOK_MULE, TOK_DIVE, TOK_NIL, TOK_NOT,
TOK_AND, TOK_OR,
/* XXX up to this point, must be parallel to long_tokens[] */
TOK_END
};
static const char *long_tokens[] = {
"%v", "%d", "%s", "function", "if", "else", "+=", "-=", "*=", "/=",
"nil", "not", "and", "or", NULL
};
static LL_HEAD(loaded_modules);
static void die(struct q *q, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
(void) vsnprintf(q->err_str, ERR_STR_SIZE, fmt, ap);
va_end(ap);
longjmp(q->jmpbuf, 1);
}
static void expand_code(struct q *q, int len)
{
q->cs += len;
q->code = realloc(q->code, q->cs);
if (q->code == NULL)
die(q, "%s: cannot grow code to %u bytes", serr, q->cs);
}
static void emit(struct q *q, const void *mem, int len)
{
if (q->code == NULL || q->cc + len >= q->cs)
expand_code(q, CHUNK_SIZE);
(void) memcpy(q->code + q->cc, mem, len);
q->cc += len;
}
static void emit_byte(struct q *q, unsigned char value)
{
emit(q, &value, sizeof(value));
}
static void emit_num(struct q *q, numeric_t val)
{
emit_byte(q, OP_PUSH);
emit_byte(q, TYPE_NUM);
emit(q, &val, sizeof(val));
}
static const int hex_tab[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static int hex_ascii_to_int(struct q *q, unsigned char ch, int line_no)
{
if (ch >= ARRAY_SIZE(hex_tab) || hex_tab[ch] == -1)
die(q, "%s: line %d: badly encoded string", serr, line_no);
return (hex_tab[ch]);
}
static void emit_string_constant(struct q *q, const struct node *node)
{
char *dst, *src;
str_size_t n = 0;
/* XXX Must go first, cause expand_code may change q->code pointer */
if (q->code + q->cc + 2 + sizeof(n) + node->vec.len > q->code + q->cs)
expand_code(q, node->vec.len + 2 + sizeof(n) + 1);
dst = q->code + q->cc + 2 + sizeof(n);
src = node->vec.ptr + 1;
assert(node->vec.ptr[0] == '"');
assert(node->vec.ptr[node->vec.len - 1] == '"');
assert(dst + node->vec.len < q->code + q->cs);
for (; src < node->vec.ptr + node->vec.len - 1; src++, n++)
if (*src == '%') {
dst[n] = hex_ascii_to_int(q,src[1],node->line_no) << 4;
dst[n] |= hex_ascii_to_int(q, src[2], node->line_no);
src += 2;
} else {
dst[n] = *src;
}
emit_byte(q, OP_PUSH);
emit_byte(q, TYPE_STR);
emit(q, &n, sizeof(n));
q->cc += n;
}
/*
* Operator attributes table. Operator's token is an index in this table.
* The value stored is an integer, where:
* o least significant byte is operator's priority
* o next byte is a number of operands
* o next byte is a flag, whether operator is right-to-left, like '='
*/
static int op_tab[256];
static int set_op_attributes(int priority, int num_args, int is_right_to_left)
{
return (priority | (num_args << 8) | (is_right_to_left << 16));
}
static void init_op_tab(void)
{
op_tab['.'] = op_tab['('] = set_op_attributes(100, 2, 0);
op_tab['['] = set_op_attributes(100, 1, 1);
op_tab['*'] = op_tab['/'] = op_tab['%'] = set_op_attributes(80, 2, 0);
op_tab['+'] = op_tab['-'] = set_op_attributes(60, 2, 0);
op_tab['='] = op_tab[TOK_ADDE] = op_tab[TOK_SUBE] =
op_tab[TOK_MULE] = op_tab[TOK_DIVE] = set_op_attributes(20, 2, 1);
op_tab[','] = set_op_attributes(10, 2, 0);
}
/*
* Shortcut functions to access operator attributes
*/
static int prio(int tok) { return (op_tab[tok] & 0xff); }
static int nargs(int tok) { return ((op_tab[tok] >> 8) & 0xff); }
static int is_rtl(int tok) { return ((op_tab[tok] >> 16) & 0xff); }
static int is_op(int tok) { return (op_tab[tok]); }
static int calc_number_of_params(const struct node *node, int n)
{
if (node->left)
n += calc_number_of_params(node->left, 0);
if (node->right)
n += calc_number_of_params(node->right, 0);
if (node->tok == ',')
n++;
return (n);
}
static int is_lvalue(const struct node *node)
{
return (node->parent != NULL && node->parent->tok == '=' &&
node->parent->left == node);
}
static int is_leftmost(const struct node *node)
{
if (node->parent != NULL && node->parent->left == node &&
node->parent->tok == '=') {
return (1);
} else if (node->parent != NULL && node->parent->left == node &&
node->parent->tok == '.') {
return (is_leftmost(node->parent));
} else {
return (0);
}
}
static void emit_ident(struct q *q, const struct node *node)
{
emit_byte(q, OP_PUSH);
emit_byte(q, TYPE_STR);
emit(q, &node->vec.len, sizeof(node->vec.len));
emit(q, node->vec.ptr, node->vec.len);
}
static void emit_expr(struct q *, const struct node *);
static void emit_hash_definition(struct q *q, const struct node *node, int idx)
{
if (node == NULL) {
die(q, "%s: line %d: bad hash def", serr, node->line_no);
} else if (node->tok == ',') {
emit_hash_definition(q, node->left, idx);
emit_byte(q, OP_POP);
emit_hash_definition(q, node->right, idx + 1);
} else if (node->tok == '=') {
if (node->left->tok == TOK_IDENT) {
emit_expr(q, node->right);
emit_byte(q, OP_DUP);
emit_byte(q, 1);
emit_ident(q, node->left);
emit_byte(q, OP_SET);
} else {
die(q, "%s: line %d: bad hash def", serr,node->line_no);
}
} else {
/* Key is the index of the entry in the definition */
emit_expr(q, node);
emit_byte(q, OP_DUP);
emit_byte(q, 1);
emit_num(q, idx);
emit_byte(q, OP_SET);
}
}
static int emit_params(struct q *q, const struct node *node, int n)
{
if (node->tok == TOK_NIL) {
/* Do nothing - no parameters passed to function */
} else if (node->tok == ',') {
n = emit_params(q, node->left, n);
n = emit_params(q, node->right, n);
} else {
emit_expr(q, node);
n++;
}
return (n);
}
static void emit_function_call(struct q *q, const struct node *node)
{
int num_params;
num_params = emit_params(q, node->right, 0);
emit_num(q, num_params);
emit_expr(q, node->left);
emit_byte(q, OP_CALL);
}
static void emit_expr(struct q *q, const struct node *node)
{
switch (node->tok) {
case '.':
if (node->left->tok == TOK_IDENT)
emit_byte(q, OP_PUSHNS);
emit_expr(q, node->left);
emit_expr(q, node->right);
emit_byte(q, OP_GET);
break;
case '%':
case '*':
case '/':
case '+':
case '-':
emit_expr(q, node->left);
emit_expr(q, node->right);
emit_byte(q, OP_MATH);
emit_byte(q, node->tok);
break;
case '=':
emit_expr(q, node->right);
emit_expr(q, node->left);
if (node->left->tok != TOK_IDENT &&
node->left->tok != '.' && node->left->tok != '=')
die(q, "%s: %d: %s", serr,
node->line_no, "invalid assignment");
emit_byte(q, OP_SET);
break;
case ',':
emit_expr(q, node->left);
emit_expr(q, node->right);
die(q, "%s: %d: %s", serr, node->line_no, "use ';', not ','");
break;
case '(':
emit_function_call(q, node);
break;
case TOK_NIL:
emit_byte(q, OP_PUSH);
emit_byte(q, TYPE_NIL);
break;
case TOK_IDENT:
/*
* Identifier could be either rvalue or lvalue:
* "foo.bar = baz = x.y;"
* Lvalues: bar, baz
* Rvalues: foo, x, y
* Lvalue is a key, for namespace insert.
* Rvalue is always a key for namespace lookup.
*/
if (node->parent == NULL || node->parent->tok != '.')
emit_byte(q, OP_PUSHNS);
emit_ident(q, node);
if (!is_lvalue(node) && !(node->parent != NULL &&
node->parent->tok == '.' && is_lvalue(node->parent)))
emit_byte(q, OP_GET);
break;
case TOK_STR:
emit_string_constant(q, node);
break;
case TOK_NUM:
emit_num(q, strtod(node->vec.ptr, NULL));
break;
case '[':
emit_byte(q, OP_NEWMAP);
assert(node->right != NULL);
if (node->right->tok != TOK_NIL) {
emit_hash_definition(q, node->right, 0);
emit_byte(q, OP_POP);
}
break;
default:
die(q, "%s: %d: %s [%d]", serr, node->line_no,
"internal parser error", node->tok);
break;
}
}
static int cmp_vec(const struct vec *a, const struct vec *b)
{
int rc;
if (a->len < b->len)
return (-cmp_vec(b, a));
rc = memcmp(a->ptr, b->ptr, b->len);
if (rc == 0)
rc = a->len - b->len;
return (rc);
}
static int cmp(const struct obj *a, const struct obj *b)
{
int res = 0;
assert(a->type == b->type);
switch (a->type) {
case TYPE_NIL:
break;
case TYPE_NUM:
res = a->val.num - b->val.num;
break;
case TYPE_STR:
res = cmp_vec(&a->val.str, &b->val.str);
break;
case TYPE_CFUNC:
break;
case TYPE_FUNC:
res = a->val.cfunc == b->val.cfunc ? 0 : 1;
break;
case TYPE_HASH:
res = a->val.hash == b->val.hash ? 0 : 1;
break;
default:
abort();
break;
}
return (res);
}
static unsigned long hash_str(unsigned long hash, const struct vec *vec)
{
int i;
for (i = 0; i < vec->len; i++)
hash = (hash << 5) - hash + vec->ptr[i];
return (hash);
}
static unsigned long calc_hash(const struct obj *obj)
{
unsigned long hash = obj->type;
switch (obj->type) {
case TYPE_NIL:
break;
case TYPE_NUM:
hash += (unsigned long) obj->val.num;
break;
case TYPE_STR:
hash = hash_str(hash, &obj->val.str);
break;
case TYPE_CFUNC:
hash += (unsigned long) obj->val.ofs;
break;
case TYPE_FUNC:
hash += (unsigned long) obj->val.cfunc;
break;
case TYPE_HASH:
break;
default:
abort();
break;
}
return (hash);
}
static struct he *lookup(struct obj *obj, struct obj *key, size_t *pcell)
{
struct llhead *lp, *head;
struct hash *hash;
struct he *he;
size_t cell;
assert(obj->type == TYPE_HASH);
hash = obj->val.hash;
cell = calc_hash(key) % ARRAY_SIZE(hash->tab);
head = hash->tab + cell;
if (pcell != NULL)
*pcell = cell;
LL_FOREACH(head, lp) {
he = LL_ENTRY(lp, struct he, link);
if (he->key.type == key->type && cmp(&he->key, key) == 0)
return(he);
}
return (NULL);
}
static struct obj *lookup_ins(struct obj *obj, struct obj *key, struct obj *val)
{
struct he *he;
struct hash *hash;
size_t cell;
hash = obj->val.hash;
if ((he = lookup(obj, key, &cell)) == NULL) {
he = calloc(1, sizeof(*he));
he->key = *key;
if (val != NULL)
he->val = *val;
LL_TAIL(hash->tab + cell, &he->link);
}
return (&he->val);
}
static struct obj *lookup_rec(struct q *q, struct obj *obj, struct obj *key)
{
struct he *he;
while (obj != NULL) {
if (obj->type != TYPE_HASH)
die(q, "%s: %d: attempt to get an attribute from the"
" <%s> object", rerr, q->cc, types[obj->type]);
else if ((he = lookup(obj, key, NULL)) != NULL)
return (&he->val);
/* This lookup is recursive. Switch to the parent object */
obj = obj->val.hash->parent;
}
return (NULL);
}
static struct obj *pop(struct q *q)
{
if (--q->sc < 0)
die(q, "%s: %d: %s", rerr, q->cc, "stack underflow");
return (q->stack + q->sc);
}
static struct obj *push(struct q *q)
{
if (q->stack == NULL || q->sc >= (int) q->ss) {
q->ss += CHUNK_SIZE;
q->stack = realloc(q->stack, q->ss * sizeof(q->stack[0]));
}
if (q->stack == NULL)
die(q, "%s: %d: %s %u", rerr, q->cc,
"cannot grow stack to %u", q->ss);
return (q->stack + q->sc++);
}
static void init_hash_obj(struct obj *obj, struct obj *parent)
{
struct hash *hash;
size_t i;
obj->type = TYPE_HASH;
obj->val.hash = hash = malloc(sizeof(*obj->val.hash));
hash->parent = parent;
LL_INIT(&hash->glob_list);
for (i = 0; i < ARRAY_SIZE(hash->tab); i++)
LL_INIT(hash->tab + i);
}
/*
* Get integer value from stack, at relative offset stack_ofs
*/
static int gsi(struct q *q, int stack_ofs)
{
const struct obj *obj = q->stack + q->sc - 1 - stack_ofs;
if (obj < q->stack)
die(q, "%s: %d: stack underflow", rerr, q->cc);
else if (obj->type != TYPE_NUM)
die(q, "%s: %d: expected number on stack", rerr, q->cc);
return ((int) obj->val.num);
}
static void execute(struct q *q)
{
const char *c = q->code + q->cc;
struct obj *obj, *val, *key, *a, *b;
while (*c != OP_END) {
q->cc = c - q->code;
if (q->cc >= (int) q->cs)
die(q, "%s", "corrupt code");
#if 0
printf("%d: sc %d (%s)\n",
q->cc, q->sc, types[q->stack[q->sc - 1].type]);
#endif
switch (*c++) {
case OP_CALL: /* ( -- ret_addr old_fc new_namespace) */
obj = pop(q);
if (obj->type == TYPE_CFUNC) {
obj->val.cfunc(q);
q->sc -= obj[-1].val.num + 1;
} else if (obj->type == TYPE_FUNC) {
/* TODO - implement native functions */
} else {
die(q, "%s: %d: attempt to call <%s> object",
rerr, q->cc, types[obj->type]);
}
break;
case OP_RETURN:
break;
case OP_SET: /* (val obj key -- val) */
key = pop(q);
obj = pop(q);
val = pop(q);
if (obj->type != TYPE_HASH)
die(q, "%s: %d: attemt to set "
"an attribute to <%s> object",
rerr, q->cc, types[obj->type]);
(void) lookup_ins(obj, key, val);
q->sc++;
break;
case OP_GET: /* (obj key -- val) */
key = pop(q);
obj = pop(q);
if ((val = lookup_rec(q, obj, key)) != NULL)
*obj = *val;
else
obj->type = TYPE_NIL;
q->sc++;
break;
case OP_PUSH: /* ( -- val) */
obj = push(q);
obj->type = *c++;
switch (obj->type) {
case TYPE_NUM:
obj->val.num = * (numeric_t *) c;
c += sizeof(numeric_t);
break;
case TYPE_STR:
obj->val.str.len = * (str_size_t *) c;
c += sizeof(str_size_t);
obj->val.str.ptr = (char *) c;
c += obj->val.str.len;
break;
case TYPE_HASH:
init_hash_obj(obj, NULL);
break;
case TYPE_NIL:
break;
default:
die(q, "%s: %d: %s", rerr, q->cc,
"attempt to PUSH an unknown shit");
abort();
break;
}
break;
case OP_MATH: /* (val1 val2 -- result) */
b = pop(q);
a = pop(q);
if (a->type != TYPE_NUM || b->type != TYPE_NUM)
die(q, "%s: %d: <%s> found, <num> expected",
rerr, q->cc, types[a->type]);
switch (*c++) {
case '+': a->val.num += b->val.num; break;
case '-': a->val.num -= b->val.num; break;
case '*': a->val.num *= b->val.num; break;
case '/':
if (b->val.num == 0)
die(q, "%s: %d: %s",
rerr, q->cc, "division by zero");
a->val.num /= b->val.num;
break;
default:
die(q, "%s: %d: unexpected arith operator: %u",
rerr, q->cc, c[-1]);
break;
}
q->sc++;
break;
case OP_POP: /* (obj --) */
(void) pop(q);
break;
case OP_DUP: /* (-- obj) */
if (q->sc <= *c)
die(q, "%s: stack underflow", rerr);
obj = q->stack + q->sc - 1 - *c++;
val = push(q);
*val = *obj;
break;
case OP_JUMP: /* (--) */
break;
case OP_PUSHNS:
obj = push(q);
obj->type = TYPE_HASH;
obj->val.hash = q->stack[q->fc].val.hash;
break;
case OP_NEWMAP: /* (-- hash_obj) */
obj = push(q);
init_hash_obj(obj, &q->stack[q->fc]);
break;
default:
die(q, "%s: %d: %s", rerr, q->cc, "corrupter code");
break;
}
}
}
static void disasm(struct q *q)
{
const char *name, *c;
int len;
for (c = q->code; *c != OP_END; c++) {
assert(NUM_INSTTRUCTIONS == ARRAY_SIZE(opcodes));
assert(*c < NUM_INSTTRUCTIONS);
name = opcodes[(int) *c];
printf("%-4d %s", c - q->code, name + 1);
if (name[0] == 'a') {
printf(" %c", *++c);
} else if (name[0] == 'n') {
printf(" %d", *++c);
} else if (name[0] == 't') {
printf(" <%s> ", types[(int) *++c]);
switch (*c) {
case TYPE_NIL:
break;
case TYPE_NUM:
printf("%.2f", (double) *(numeric_t *)(c + 1));
c += sizeof(numeric_t);
break;
case TYPE_STR:
len = * (str_size_t *) (c + 1);
c += sizeof(str_size_t);
printf("\"%.*s\"", (int) len, c + 1);
c += len;
break;
default:
break;
}
}
(void) putchar('\n');
}
}
static void print_obj(const struct obj *obj)
{
switch (obj->type) {
case TYPE_STR:
printf("%.*s", (int) obj->val.str.len, obj->val.str.ptr);
break;
case TYPE_NUM:
printf("%.2f", (double) obj->val.num);
break;
default:
printf("<%s>", types[obj->type]);
break;
}
}
static void print(struct q *q)
{
int i, param_count = gsi(q, 0);
struct obj *obj = q->stack + q->sc - param_count - 1;
for (i = 0; i < param_count; i++, obj++)
print_obj(obj);
}
static void q_free(struct q *q)
{
if (q != NULL) {
if (q->code != NULL)
free(q->code);
if (q->stack != NULL)
free(q->stack);
if (q->name != NULL)
free(q->name);
free(q);
}
}
static struct obj *setattr(struct obj *obj, const char *name, struct obj *attr)
{
struct obj key;
key.type = TYPE_STR;
key.val.str.ptr = (char *) name;
key.val.str.len = strlen(name);
return (lookup_ins(obj, &key, attr));
}
static void import_builtin_objects(struct q *q)
{
struct obj obj, *namespace = q->stack;
obj.type = TYPE_CFUNC;
obj.val.cfunc = &print;
setattr(namespace, "print", &obj);
obj.val.cfunc = &disasm;
setattr(namespace, "disasm", &obj);
}
static void q_save(const char *path, const struct q *q, char *err_str)
{
FILE *fp;
if ((fp = fopen(path, "w+")) == NULL) {
(void) snprintf(err_str, ERR_STR_SIZE,
"fopen(%s): %s", path, strerror(errno));
} else {
(void) fwrite(PCODE_MAGIC, 1, sizeof(PCODE_MAGIC) - 1, fp);
(void) fwrite(q->code, 1, q->cs, fp);
(void) fclose(fp);
}
}
static int match_long_token(const char *str, const char *in)
{
int len = 0;
char *end;
if (str[0] == '%' && str[1] == 'v') {
if (*in == '_' || isalpha(* (unsigned char *) in))
while (in[len] == '_' ||
isalnum(((unsigned char *)in)[len]))
len++;
} else if (str[0] == '%' && str[1] == 'd') {
(void) strtod(in, &end);
len = end - in;
} else if (str[0] == '%' && str[1] == 's') {
if (*in == '"' && (end = strchr(in + 1, '"')) != NULL)
len = end - in + 1;
} else if (strncmp(in, str, strlen(str)) == 0) {
len = strlen(str);
}
return (len);
}
/*
* Parse the file, calling user function for every token.
* Return total number of tokens, including last TOK_END.
*/
static int tokenize(const char *buf, struct node *nodes)
{
struct node *node;
const char *cp;
int i, line_no = 1, num_nodes = 0;
do {
/* Skip whitespaces and comments, counting line numbers */
again: for (; isspace(*(unsigned char *) buf); buf++)
if (*buf == '\n')
line_no++;
if (*buf == '#')
for (buf++; *buf != '\0'; buf++)
if (*buf == '\n')
goto again;
/* Initialize node */
node = nodes + num_nodes++;
(void) memset(node, 0, sizeof(struct node));
node->vec.ptr = (char *) buf;
node->line_no = line_no;
node->tok = TOK_END;
/* Try to match one of the long tokens */
for (i = 0; long_tokens[i] != NULL; i++)
if ((node->vec.len =
match_long_token(long_tokens[i], buf)) > 0) {
node->tok = TOK_IDENT + i;
break;
}
/* Try to match one character token */
if (node->vec.len == 0 && *buf != '\0' &&
((cp = strchr(one_char_tokens, *buf))) != NULL) {
node->tok = *cp;
node->vec.len = 1;
}
buf += node->vec.len;
#if 0
printf("tok %d: [%.*s]\n",
node->tok, node->vec.len, node->vec.ptr);
#endif
} while (node->tok != TOK_END);
return (num_nodes);
}
static void bubble_sort_ops_by_priority(struct node **ops, int num_ops)
{
struct node *tmp;
int i, swapped;
/*
* All left-to-right nodes must be sorted by priority. Sorting is
* stable, i.e. the order is not broken for the nodes of equal
* priority. We *DO* break the order for the right-to-left nodes,
* though. So same-prio RTL nodes are reversed.
*/
do {
swapped = 0;
for (i = 0; i < num_ops - 1; i ++)
if ((is_rtl(ops[i]->tok) && is_rtl(ops[i + 1]->tok) &&
prio(ops[i]->tok) == prio(ops[i + 1]->tok) &&
ops[i] < ops[i + 1]) ||
(prio(ops[i]->tok) < prio(ops[i + 1]->tok))) {
tmp = ops[i];
ops[i] = ops[i + 1];
ops[i + 1] = tmp;
swapped = 1;
}
} while (swapped == 1);
}
static void reduce(struct q *q, struct node *parent, int ano, struct llhead *h)
{
int take_from_left;
struct llhead *lp;
struct node *node;
assert(is_op(parent->tok));
/* Figure out which node to take - from the left or from the right */
take_from_left = !ano ^ is_rtl(parent->tok);
lp = take_from_left ? parent->link.prev : parent->link.next;
node = LL_ENTRY(lp, struct node, link);
/*
* Make sure the node pointer points to the real node. It may not
* point to the real node if parent is:
* - first in the expression and is left-to-right
* - last in the expression and is right-to-left
* In these (erroneous) cases, 'lp' points to the list head,
* not node's link.
*/
if (lp == h)
die(q, "%s: line %d", serr, node->line_no);
/*
* Delete operand from the expession list, and append it
* respectively to the left or right branch in the parent.
*/
LL_DEL(&node->link);
if (take_from_left)
parent->left = node;
else
parent->right = node;
node->parent = parent;
}
static int qaz(const struct node *node, int num_nodes)
{
int prev_tok = node[-1].tok;
return (num_nodes > 1 &&
(prev_tok == TOK_IDENT || prev_tok == ']' || prev_tok == ')'));
}
static int expr(struct q *q, struct node *tab, int endtok, struct node **root)
{
static struct node nil;
struct node *ops[MAX_EXPR_TOKS], *node, *tmp;
struct llhead head;
int i, j, num_toks, num_ops;
nil.tok = TOK_NIL;
LL_INIT(&head);
num_toks = num_ops = 0;
/*
* Scan the expression until end token reached. Link all tokens
* into one list. Insert operations into ops array. We'll use list
* and ops array to build the syntax tree for the expression.
*/
while (tab[num_toks].tok != endtok) {
/* Link that node into expression's list */
node = tab + num_toks++;
LL_TAIL(&head, &node->link);
if (node->tok == TOK_END) {
die(q, "line %d: unexpected EOF", node->line_no);
} else if (node->tok == '(' || node->tok == '[') {
/* Disambiguate () and [] operators */
if (node->tok == '(' && !qaz(node, num_toks))
LL_DEL(&node->link);
else if (node->tok == '[' && qaz(node, num_toks))
node->tok = '.';
num_toks += expr(q, node + 1,
node->tok == '(' ? ')' : ']', &tmp);
LL_TAIL(&head, &tmp->link);
}
if (is_op(node->tok))
ops[num_ops++] = node;
}
/* Reduce token list into a syntax tree */
bubble_sort_ops_by_priority(ops, num_ops);
for (i = 0; i < num_ops; i++)
for (j = 0; j < nargs(ops[i]->tok); j++)
reduce(q, ops[i], j, &head);
/*
* Exactly one node must left linked next to the list head,
* and that one is the root of the tree. If num_nodes is 0, then
* the expression is empty. In this case NIL node is returned as root.
*/
if (head.next != head.prev)
die(q, "%s: line %d: %s", serr, node->line_no, "bad expr");
*root = num_toks == 0 ? &nil : LL_ENTRY(head.next, struct node, link);
return (num_toks + 1); /* +1 is for the end-token */
}
static void q_exec_string(struct q *q, const char *buf)
{
struct node pool[MAX_SOURCE_SIZE], *root;
int idx = 0;
/* Allocate an array of tokens on the stack, and tokenize the source */
tokenize(buf, pool);
/* Build syntax tree for each top-level statement */
while (pool[idx].tok != TOK_END) {
idx += expr(q, pool + idx, ';', &root);
emit_expr(q, root);
emit_byte(q, OP_POP);
}
/* Finish compilation */
emit_byte(q, OP_END);
}
static void compile(struct q *q, const char *path)
{
char buf[MAX_SOURCE_SIZE];
int n = 0;
FILE *fp;
/* Read whole source file in buffer */
if ((fp = fopen(path, "r")) == NULL) {
die(q, "fopen(%s): %s", path, strerror(errno));
} else if ((n = fread(buf, 1, sizeof(buf), fp)) < 0) {
die(q, "fread(%s): %s", path, strerror(errno));
} else if (n == sizeof(buf)) {
die(q, "%s: file is too large", path);
}
/* \0 terminate it the buffer, so tokenizer knows where to stop */
buf[n] = '\0';
(void) fclose(fp);
q_exec_string(q, buf);
/* Initialize namespace hash. */
init_hash_obj(push(q), NULL);
assert(q->sc == 1);
import_builtin_objects(q);
/* Point to the start of the code */
disasm(q);
q->cc = 0;
execute(q);
LL_ADD(&loaded_modules, &q->link);
}
static struct q *q_load(const char *path, char *err_str)
{
struct llhead *lp;
struct q *q;
/* Maybe this module is already loaded? */
LL_FOREACH(&loaded_modules, lp) {
q = LL_ENTRY(lp, struct q, link);
if (strcmp(q->name, path) == 0)
return (q); /* Yeah */
}
/* No, not loaded yet. Load it now. */
if ((q = calloc(1, sizeof(*q))) == NULL) {
(void) snprintf(err_str, ERR_STR_SIZE, "%s",
"cannot allocate memory for q");
} else if (setjmp(q->jmpbuf) != 0) {
q_free(q);
q = NULL;
} else {
q->err_str = err_str;
q->name = strdup(path);
compile(q, path);
}
return (q);
}
static void usage(void)
{
(void) fprintf(stderr, "usage: qq [-c <file>] file\n");
exit(EXIT_FAILURE);
}
static const char *get_opt(int *pargc, char ***pargv)
{
const char *opt;
if ((*pargv)[1][2] != '\0') {
opt = &(*pargv)[1][2];
(*pargc)--;
(*pargv)++;
} else {
opt = (*pargv)[2];
(*pargc) -= 2;
(*pargv) += 2;
}
if (opt == NULL)
usage();
return (opt);
}
int main(int argc, char *argv[])
{
char err_str[ERR_STR_SIZE];
const char *pcode_file = NULL;
struct q *q;
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != 0)
switch (argv[1][1]) {
case 'c':
pcode_file = get_opt(&argc, &argv);
break;
default:
usage();
break;
}
init_op_tab();
q = q_load(argc > 1 ? argv[1]: "test.txt", err_str);
if (q == NULL)
printf("%s\n", err_str);
else if (pcode_file != NULL)
q_save(pcode_file, q, err_str);
return (0);
}
/* file: "tinyc.c" */
/* Copyright (C) 2001 by Marc Feeley, All Rights Reserved. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* This is a compiler for the Tiny-C language. Tiny-C is a
* considerably stripped down version of C and it is meant as a
* pedagogical tool for learning about compilers. The integer global
* variables "a" to "z" are predefined and initialized to zero, and it
* is not possible to declare new variables. The compiler reads the
* program from standard input and prints out the value of the
* variables that are not zero. The grammar of Tiny-C in EBNF is:
*
* <program> ::= <statement>
* <statement> ::= "if" <paren_expr> <statement> |
* "if" <paren_expr> <statement> "else" <statement> |
* "while" <paren_expr> <statement> |
* "do" <statement> "while" <paren_expr> ";" |
* "{" { <statement> } "}" |
* <expr> ";" |
* ";"
* <paren_expr> ::= "(" <expr> ")"
* <expr> ::= <test> | <id> "=" <expr>
* <test> ::= <sum> | <sum> "<" <sum>
* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term>
* <term> ::= <id> | <int> | <paren_expr>
* <id> ::= "a" | "b" | "c" | "d" | ... | "z"
* <int> ::= <an_unsigned_decimal_integer>
*
* Here are a few invocations of the compiler:
*
* % echo "a=b=c=2<3;" | ./a.out
* a = 1
* b = 1
* c = 1
* % echo "{ i=1; while (i<100) i=i+i; }" | ./a.out
* i = 128
* % echo "{ i=125; j=100; while (i-j) if (i<j) j=j-i; else i=i-j; }" | ./a.out
* i = 25
* j = 25
* % echo "{ i=1; do i=i+10; while (i<50); }" | ./a.out
* i = 51
* % echo "{ i=1; while ((i=i+10)<50) ; }" | ./a.out
* i = 51
* % echo "{ i=7; if (i<5) x=1; if (i<10) y=2; }" | ./a.out
* i = 7
* y = 2
*
* The compiler does a minimal amount of error checking to help
* highlight the structure of the compiler.
*/
/*---------------------------------------------------------------------------*/
/* Lexer. */
enum { DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, LBRA, RBRA, LPAR, RPAR,
PLUS, MINUS, LESS, SEMI, EQUAL, INT, ID, EOI };
char *words[] = { "do", "else", "if", "while", NULL };
int ch = ' ';
int sym;
int int_val;
char id_name[100];
void syntax_error() { fprintf(stderr, "syntax error\n"); exit(1); }
void next_ch() { ch = getchar(); }
void next_sym()
{ again: switch (ch)
{ case ' ': case '\n': next_ch(); goto again;
case EOF: sym = EOI; break;
case '{': next_ch(); sym = LBRA; break;
case '}': next_ch(); sym = RBRA; break;
case '(': next_ch(); sym = LPAR; break;
case ')': next_ch(); sym = RPAR; break;
case '+': next_ch(); sym = PLUS; break;
case '-': next_ch(); sym = MINUS; break;
case '<': next_ch(); sym = LESS; break;
case ';': next_ch(); sym = SEMI; break;
case '=': next_ch(); sym = EQUAL; break;
default:
if (ch >= '0' && ch <= '9')
{ int_val = 0; /* missing overflow check */
while (ch >= '0' && ch <= '9')
{ int_val = int_val*10 + (ch - '0'); next_ch(); }
sym = INT;
}
else if (ch >= 'a' && ch <= 'z')
{ int i = 0; /* missing overflow check */
while ((ch >= 'a' && ch <= 'z') || ch == '_')
{ id_name[i++] = ch; next_ch(); }
id_name[i] = '\0';
sym = 0;
while (words[sym] != NULL && strcmp(words[sym], id_name) != 0)
sym++;
if (words[sym] == NULL)
if (id_name[1] == '\0') sym = ID; else syntax_error();
}
else
syntax_error();
}
}
/*---------------------------------------------------------------------------*/
/* Parser. */
enum { VAR, CST, ADD, SUB, LT, SET,
IF1, IF2, WHILE, DO, EMPTY, SEQ, EXPR, PROG };
struct node { int kind; struct node *o1, *o2, *o3; int val; };
typedef struct node node;
node *new_node(int k)
{ node *x = (node*)malloc(sizeof(node)); x->kind = k; return x; }
node *paren_expr(); /* forward declaration */
node *term() /* <term> ::= <id> | <int> | <paren_expr> */
{ node *x;
if (sym == ID) { x=new_node(VAR); x->val=id_name[0]-'a'; next_sym(); }
else if (sym == INT) { x=new_node(CST); x->val=int_val; next_sym(); }
else x = paren_expr();
return x;
}
node *sum() /* <sum> ::= <term> | <sum> "+" <term> | <sum> "-" <term> */
{ node *t, *x = term();
while (sym == PLUS || sym == MINUS)
{ t=x; x=new_node(sym==PLUS?ADD:SUB); next_sym(); x->o1=t; x->o2=term(); }
return x;
}
node *test() /* <test> ::= <sum> | <sum> "<" <sum> */
{ node *t, *x = sum();
if (sym == LESS)
{ t=x; x=new_node(LT); next_sym(); x->o1=t; x->o2=sum(); }
return x;
}
node *expr() /* <expr> ::= <test> | <id> "=" <expr> */
{ node *t, *x;
if (sym != ID) return test();
x = test();
if (x->kind == VAR && sym == EQUAL)
{ t=x; x=new_node(SET); next_sym(); x->o1=t; x->o2=expr(); }
return x;
}
node *paren_expr() /* <paren_expr> ::= "(" <expr> ")" */
{ node *x;
if (sym == LPAR) next_sym(); else syntax_error();
x = expr();
if (sym == RPAR) next_sym(); else syntax_error();
return x;
}
node *statement()
{ node *t, *x;
if (sym == IF_SYM) /* "if" <paren_expr> <statement> */
{ x = new_node(IF1);
next_sym();
x->o1 = paren_expr();
x->o2 = statement();
if (sym == ELSE_SYM) /* ... "else" <statement> */
{ x->kind = IF2;
next_sym();
x->o3 = statement();
}
}
else if (sym == WHILE_SYM) /* "while" <paren_expr> <statement> */
{ x = new_node(WHILE);
next_sym();
x->o1 = paren_expr();
x->o2 = statement();
}
else if (sym == DO_SYM) /* "do" <statement> "while" <paren_expr> ";" */
{ x = new_node(DO);
next_sym();
x->o1 = statement();
if (sym == WHILE_SYM) next_sym(); else syntax_error();
x->o2 = paren_expr();
if (sym == SEMI) next_sym(); else syntax_error();
}
else if (sym == SEMI) /* ";" */
{ x = new_node(EMPTY); next_sym(); }
else if (sym == LBRA) /* "{" { <statement> } "}" */
{ x = new_node(EMPTY);
next_sym();
while (sym != RBRA)
{ t=x; x=new_node(SEQ); x->o1=t; x->o2=statement(); }
next_sym();
}
else /* <expr> ";" */
{ x = new_node(EXPR);
x->o1 = expr();
if (sym == SEMI) next_sym(); else syntax_error();
}
return x;
}
node *program() /* <program> ::= <statement> */
{ node *x = new_node(PROG);
next_sym(); x->o1 = statement(); if (sym != EOI) syntax_error();
return x;
}
/*---------------------------------------------------------------------------*/
/* Code generator. */
enum { IFETCH, ISTORE, IPUSH, IPOP, IADD, ISUB, ILT, JZ, JNZ, JMP, HALT };
typedef char code;
code object[1000], *here = object;
void g(code c) { *here++ = c; } /* missing overflow check */
code *hole() { return here++; }
void fix(code *src, code *dst) { *src = dst-src; } /* missing overflow check */
void c(node *x)
{ code *p1, *p2;
switch (x->kind)
{ case VAR : g(IFETCH); g(x->val); break;
case CST : g(IPUSH); g(x->val); break;
case ADD : c(x->o1); c(x->o2); g(IADD); break;
case SUB : c(x->o1); c(x->o2); g(ISUB); break;
case LT : c(x->o1); c(x->o2); g(ILT); break;
case SET : c(x->o2); g(ISTORE); g(x->o1->val); break;
case IF1 : c(x->o1); g(JZ); p1=hole(); c(x->o2); fix(p1,here); break;
case IF2 : c(x->o1); g(JZ); p1=hole(); c(x->o2); g(JMP); p2=hole();
fix(p1,here); c(x->o3); fix(p2,here); break;
case WHILE: p1=here; c(x->o1); g(JZ); p2=hole(); c(x->o2);
g(JMP); fix(hole(),p1); fix(p2,here); break;
case DO : p1=here; c(x->o1); c(x->o2); g(JNZ); fix(hole(),p1); break;
case EMPTY: break;
case SEQ : c(x->o1); c(x->o2); break;
case EXPR : c(x->o1); g(IPOP); break;
case PROG : c(x->o1); g(HALT); break;
}
}
/*---------------------------------------------------------------------------*/
/* Virtual machine. */
int globals[26];
void run()
{ int stack[1000], *sp = stack;
code *pc = object;
again: switch (*pc++)
{ case IFETCH: *sp++ = globals[*pc++]; goto again;
case ISTORE: globals[*pc++] = sp[-1]; goto again;
case IPUSH : *sp++ = *pc++; goto again;
case IPOP : --sp; goto again;
case IADD : sp[-2] = sp[-2] + sp[-1]; --sp; goto again;
case ISUB : sp[-2] = sp[-2] - sp[-1]; --sp; goto again;
case ILT : sp[-2] = sp[-2] < sp[-1]; --sp; goto again;
case JMP : pc += *pc; goto again;
case JZ : if (*--sp == 0) pc += *pc; else pc++; goto again;
case JNZ : if (*--sp != 0) pc += *pc; else pc++; goto again;
}
}
/*---------------------------------------------------------------------------*/
/* Main program. */
int main()
{ int i;
c(program());
for (i=0; i<26; i++)
globals[i] = 0;
run();
for (i=0; i<26; i++)
if (globals[i] != 0)
printf("%c = %d\n", 'a'+i, globals[i]);
return 0;
}
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
// === runtime ===
enum { tObject, tNumber, tString };
typedef struct {
char type;
union {
char *string;
int number;
} value;
int refcount;
} Object;
static Object *TrueObject;
static Object *FalseObject;
static Object *NilObject;
Object *Object_new() {
return calloc(1, sizeof(Object));
}
/////////////// GC ///////////////
Object *retain(Object *self) {
self->refcount++;
return self;
}
void release(Object *self) {
self->refcount--;
if (self->refcount <= 0)
free(self);
}
///////////////////////////////////
char Object_is_true(Object *self) {
// false and nil are == false,
// everything else is true.
if (self == FalseObject || self == NilObject) return 0;
return 1;
}
Object *Number_new(int value) {
Object *object = Object_new();
object->type = tNumber;
object->value.number = value;
return object;
}
int Number_value(Object *self) {
assert(self->type == tNumber);
return self->value.number;
}
Object *String_new(char *value) {
Object *object = Object_new();
object->type = tString;
object->value.string = value;
return object;
}
Object *call(Object *receiver, char *message, Object *argv[], int argc) {
// HACK hardcoded methods. In a real runtime, you'd have proper classes and methods.
// See runtime.rb in our interpreter for an example.
if (strcmp(message, "+") == 0) {
// Number#+
return Number_new(receiver->value.number + argv[0]->value.number);
} else if (strcmp(message, "print") == 0) {
// Object#print
switch (argv[0]->type) {
case tNumber:
printf("%d\n", argv[0]->value.number);
return argv[0];
case tString:
printf("%s\n", argv[0]->value.string);
return argv[0];
}
}
return 0;
}
void init_runtime() {
TrueObject = retain(Object_new());
FalseObject = retain(Object_new());
NilObject = retain(Object_new());
}
void destroy_runtime() {
release(TrueObject);
release(FalseObject);
release(NilObject);
}
// === opcode ===
enum {
// ------- Stack -------
// Opcode Operands before after
/* 00 */ CALL, // index, argc [rcv, arg...] [returned]
/* 01 */ PUSH_NUMBER, // index [] [number]
/* 02 */ PUSH_STRING, // index [] [string]
/* 03 */ PUSH_SELF, // [] [self]
/* 04 */ PUSH_NIL, // [] [nil]
/* 05 */ PUSH_BOOL, // 1=t, 0=f [] [true or false]
/* 06 */ GET_LOCAL, // index [] [value]
/* 07 */ SET_LOCAL, // index [value] []
/* 08 */ JUMP_UNLESS, // offset [test] []
/* 09 */ JUMP, // offset [] []
/* 10 */ ADD, // [a, b] [result]
/* 11 */ RETURN // [] []
};
typedef unsigned char byte; // 1 byte
// === bytecode ===
/*
This file was compiled by the `compile` script.
It contains the bytecode for the following code:
print("the answer is:")
a = 30 + 2
if false
print(a)
else
print("no")
end
*/
#define LITERALS { \
(void *) "the answer is:", \
(void *) "print", \
(void *) 30, \
(void *) 2, \
(void *) "no", \
}
#define INSTRUCTIONS { 3, 2, 0, 0, 1, 1, 1, 2, 1, 3, 10, 7, 0, 5, 0, 8, 8, 3, 6, 0, 0, 1, 1, 9, 6, 3, 2, 4, 0, 1, 1, 11 }
// Helpers to play with the stack
#define LOCALS_MAX 10
#define STACK_MAX 10
#define STACK_PUSH(I) do { \
assert(sp-stack < STACK_MAX); \
*(++sp) = (I); \
retain(*sp); \
} while(0)
#define STACK_POP() (*sp--)
void run(void *literals[], byte instructions[]) {
byte *ip = instructions; // instruction pointer
Object *stack[STACK_MAX]; // THE famous stack
Object **sp = stack; // stack pointer, keeps track of current position
Object *locals[LOCALS_MAX] = {}; // where we store our local variables
// Setup the runtime
Object *self = Object_new();
retain(self);
// Start processing instructions
while (1) {
switch (*ip) {
case PUSH_NUMBER: {
ip++; // advance to the operand (literal index)
STACK_PUSH(Number_new((long)literals[*ip]));
break;
}
case PUSH_STRING: {
ip++; // advance to the operand (literal index)
STACK_PUSH(String_new((char *)literals[*ip]));
break;
}
case PUSH_SELF: {
STACK_PUSH(self);
break;
}
case PUSH_NIL: {
STACK_PUSH(NilObject);
break;
}
case PUSH_BOOL: {
ip++; // advance to operand (0 = false, 1 = true)
if (*ip == 0) {
STACK_PUSH(FalseObject);
} else {
STACK_PUSH(TrueObject);
}
break;
}
case CALL: {
ip++; // advance to operand (method name index in literals)
char *method = literals[*ip];
ip++; // advance to operand (# of args)
int argc = *ip;
Object *argv[10];
int i;
for (i = argc - 1; i >= 0; i--) argv[i] = STACK_POP();
Object *receiver = STACK_POP();
Object *result = call(receiver, method, argv, argc);
STACK_PUSH(result);
// Release all the objects
for (i = argc - 1; i >= 0; i--) release(argv[i]);
release(receiver);
break;
}
case RETURN: {
goto cleanup;
break;
}
case GET_LOCAL: {
ip++; // advance operand (local index)
STACK_PUSH(locals[*ip]);
break;
}
case SET_LOCAL: {
ip++; // local index
locals[*ip] = STACK_POP();
break;
}
case ADD: {
Object *a = STACK_POP();
Object *b = STACK_POP();
STACK_PUSH(Number_new(Number_value(a) + Number_value(b)));
release(a);
release(b);
break;
}
case JUMP_UNLESS: {
ip++; // operand (offset, # of bytes to jump forward)
byte offset = *ip;
Object *condition = STACK_POP();
if (!Object_is_true(condition)) ip += offset;
release(condition);
break;
}
case JUMP: {
ip++; // operand (offset, # of bytes to jump forward)
byte offset = *ip;
ip += offset;
break;
}
}
ip++;
}
cleanup:
release(self);
int i;
// Release all the local variables
for (i = 0; i < LOCALS_MAX; i++) if (locals[i]) release(locals[i]);
// Release everything left on the stack
while (sp > stack) release(STACK_POP());
}
int main (int argc, char const *argv[]) {
// Get compiled literals table and instructions from bytecode.h
void *literals[] = LITERALS;
byte instructions[] = INSTRUCTIONS;
init_runtime();
// Run that thing!
run(literals, instructions);
destroy_runtime();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment