Last active
August 29, 2015 14:01
-
-
Save andersonsp/7c0d6643de0dbc76632f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// lys.c forth version by AndersonSP | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdint.h> | |
#include <stdarg.h> | |
//---------------------------------------------------------------- | |
// Object defs based on http://piumarta.com/software/id-objmodel/ | |
// | |
typedef struct _State State; | |
typedef struct _Vtable Vtable; | |
typedef struct _Object Object; | |
typedef struct _Closure Closure; | |
typedef struct _Number Number; | |
typedef struct _String String; | |
typedef void (*Primitive)(State* st, Closure *w, Object *self); | |
#define BUFFER_SIZE 512 | |
#define CODE_SIZE 1024 | |
#define STACK_SIZE 512 | |
void execute( State* st, Closure* msg, Object *self ); | |
Vtable *vtable_delegated(Vtable *self, char* name); | |
Object *object_new(Vtable *self, int payloadSize); | |
Object *vtable_add_slot(Vtable *self, Object *key, Object *value); | |
void stack_push( State* st, Object* p ); | |
Object* stack_pop( State* st ); | |
Object* stack_top( State* st ); | |
struct _State{ | |
Vtable *vtable_vt, *object_vt, *closure_vt; // core object prototypes | |
Vtable *number_vt, *string_vt, *boolean_vt; // ... | |
Vtable *dict; // symbol dictionary | |
Object *lobby, *s_true, *s_false; // default objects, lobby should be the first element of the stack at all moments | |
Object* stack[STACK_SIZE]; | |
int ret, sp; // return flag, top of stack pointer | |
int ch, line; // for parsing | |
FILE* infile; | |
}; | |
typedef struct { | |
Object *key, *value; | |
} Assoc; | |
typedef struct { | |
uint8_t kind, arity, preced, right; | |
Object *key; | |
} Instr; | |
struct _Vtable { | |
Vtable *_vt[0], *parent; | |
char name[32]; | |
int size, tally; | |
Assoc *bindings; | |
}; | |
struct _Object { | |
Vtable *_vt[0]; | |
}; | |
struct _Closure { | |
Vtable *_vt[0]; | |
Primitive method; | |
int length; | |
Instr code[1]; | |
}; | |
struct _String { | |
Vtable *_vt[0]; | |
int length, preced, right; // for operators | |
char chars[1]; | |
}; | |
struct _Number { | |
int num; | |
}; | |
void die( const char* str, ... ) { | |
va_list a; | |
va_start(a, str); | |
vfprintf(stderr, str, a ); | |
exit(1); | |
} | |
static inline Object *vtable_lookup(Vtable *self, Object *key) { | |
Vtable* vt = self; | |
Assoc* assoc; | |
while( vt ){ | |
for( assoc = vt->bindings; assoc->key; assoc++ ) { | |
if( key == assoc->key ) return assoc->value; | |
} | |
vt = vt->parent; | |
} | |
die("Fatal:\n %s:%p has no slot '%s'\n", self->name, self, ((String *)key)->chars ); | |
return 0; //unreached | |
} | |
//TODO: this could use some caching | |
inline void send(State* st, Object *rcv, Object* msg) { | |
Object *slot = vtable_lookup(rcv->_vt[-1], msg); | |
if( slot->_vt[-1] == st->closure_vt ) ((Closure*)slot)->method( st, (Closure*)slot, rcv ); | |
else stack_push( st, slot ); | |
} | |
inline void *alloc(size_t size) { | |
Vtable **ppvt= (Vtable **)calloc(1, sizeof(Vtable *) + size); | |
return (void *)(ppvt + 1); | |
} | |
inline void dealloc( Object* self) { | |
if(self) free( &self->_vt[-1] ); | |
} | |
Object *number_new(State* st, int num) { | |
Number *clone = (Number *)object_new(st->number_vt, sizeof(Number)); | |
clone->num = num; | |
return (Object *)clone; | |
} | |
Object *string_new(State* st, char *str) { | |
int len = strlen(str); | |
String *string = (String *)alloc(sizeof(String) + len + 1); | |
string->_vt[-1] = st->string_vt; | |
string->length = len; | |
strcpy(string->chars, str); | |
return (Object *)string; | |
} | |
Closure *closure_new(State* st, Primitive method, int num_instr, Instr* instr_list) { | |
Closure *closure = (Closure *)alloc(sizeof(Closure) + sizeof(Instr) * num_instr); | |
closure->_vt[-1] = st->closure_vt; | |
closure->method = method; | |
if( instr_list ){ | |
closure->length = num_instr; | |
memcpy( closure->code, instr_list, num_instr*sizeof(Instr)); | |
} | |
return closure; | |
} | |
Vtable *vtable_delegated(Vtable *self, char* name) { | |
Vtable *child= (Vtable *)alloc( sizeof(Vtable) ); | |
child->_vt[-1] = self ? self->_vt[-1] : child; | |
child->size = 16; | |
child->tally = 0; | |
child->bindings = (Assoc *)calloc(child->size, sizeof(Assoc)); | |
child->parent = self; | |
strcpy(child->name, name); | |
return child; | |
} | |
Object *object_new(Vtable *self, int payloadSize ) { | |
Object *object = (Object *)alloc(payloadSize); | |
object->_vt[-1] = self; | |
return object; | |
} | |
Object* vtable_add_slot(Vtable *self, Object *key, Object *value) { | |
Assoc* assoc; | |
for( assoc = self->bindings; assoc->key; assoc++ ) { | |
if( key == assoc->key ) { | |
dealloc(assoc->value); | |
return assoc->value = value; | |
} | |
} | |
if( self->tally == self->size-1 ) { | |
self->size *= 2; | |
self->bindings = (Assoc *)realloc(self->bindings, sizeof(Assoc) * self->size); | |
} | |
assoc = &self->bindings[self->tally++]; | |
assoc->key = key; | |
return assoc->value = value; | |
} | |
Object *string_intern(State* st, char *string){ | |
Object *symbol; | |
int i; | |
for( i = 0; i < st->dict->tally; ++i ) { | |
symbol = st->dict->bindings[i].key; | |
if( !strcmp(string, ((String *)symbol)->chars) ) return symbol; | |
} | |
symbol = string_new( st, string ); | |
vtable_add_slot( st->dict, symbol, 0 ); | |
return symbol; | |
} | |
State* state_new() { | |
State *st = calloc(sizeof(State), 1); | |
st->vtable_vt = vtable_delegated(0, "Vtable"); | |
st->vtable_vt->_vt[-1] = st->vtable_vt; | |
st->object_vt = vtable_delegated(0, "Object"); | |
st->object_vt->_vt[-1] = st->vtable_vt; | |
st->vtable_vt->parent = st->object_vt; | |
st->dict = vtable_delegated(0, "SymbolTable"); | |
st->dict->_vt[-1] = 0; | |
st->closure_vt = vtable_delegated(st->object_vt, "Closure"); | |
st->number_vt = vtable_delegated(st->object_vt, "Number"); | |
st->string_vt = vtable_delegated(st->object_vt, "String"); | |
st->boolean_vt = vtable_delegated(st->object_vt, "Boolean"); | |
st->lobby = object_new(st->object_vt, 0); | |
st->s_true = object_new(st->boolean_vt, 0); | |
st->s_false = object_new(st->boolean_vt, 0); | |
stack_push( st, st->lobby ); | |
return st; | |
} | |
void stack_push( State* st, Object* p ) { | |
st->stack[++st->sp] = p; | |
} | |
Object* stack_pop( State* st ) { | |
if(st->sp > 1) return st->stack[st->sp--]; | |
die( "Fatal: Trying to pop the lobby from the stack\n" ); | |
} | |
Object* stack_top( State* st ) { | |
return st->stack[st->sp]; | |
} | |
// * Read a word from any utf8 stream and return it. | |
static inline utf8_seq( uint8_t first_byte ){ | |
if((first_byte & 0x80) == 0x00) return 1; // 0xxxxxxx | |
if((first_byte & 0xC0) == 0x80) return 0; // 10xxxxxx - invalid | |
if((first_byte & 0xE0) == 0xC0) return 2; // 110xxxxx | |
if((first_byte & 0xF0) == 0xE0) return 3; // 1110xxxx | |
if((first_byte & 0xF8) == 0xF0) return 4; // 11110xxx | |
if((first_byte & 0xFC) == 0xF8) return 5; // 111110xx | |
if((first_byte & 0xFE) == 0xFC) return 6; // 1111110x | |
} | |
static int utf8_read_multibyte(FILE *stream, int first, uint8_t *bytes) { | |
if( first == EOF ) return 0; // don't read EOF into the buffer | |
int i, len = utf8_seq( first ); | |
bytes[0] = first; | |
for( i=1; i<len; i++ ) { // read subsequent character bytes | |
int c = getc(stream); | |
if( c == EOF || (c & 0xC0) != 0x80 ) return 0; | |
bytes[i] = c; | |
} | |
return len; // return number of bytes read into buffer | |
} | |
static const char *operators = ":~!@$%^&*-+=|\\<>?/"; | |
static const char *delim = " \t\r\n()[]{};,"; | |
enum { TOK_END, TOK_LIT, TOK_NUMBER, TOK_STRING, TOK_SYMBOL, TOK_OPERATOR, TOK_MALFORM, TOK_UNKNOWN }; | |
static int utf8_next_symbol( State* st, FILE* stream, uint8_t* buf ) { | |
int tok = TOK_UNKNOWN, ch = st->ch, wp = 0; | |
int string_delim; | |
again: | |
// printf("ch: %c \n", ch); | |
switch(ch) { | |
case ' ': case '\t': case '\r': // eat whitespace | |
ch = getc( stream ); | |
goto again; | |
case '\n': | |
st->line++; | |
ch = getc( stream ); | |
goto again; | |
case '#': | |
do { | |
ch = getc( stream ); | |
} while( ch != '\n' && ch != EOF && ch != '\0' ); | |
goto again; | |
case EOF: case '\0': // end of file | |
return TOK_END; | |
case '(': case ')': case '[': case ']': case '{': case '}': //case ',': case ';': | |
tok = ch; | |
ch = getc(stream); | |
break; | |
case '\"': case '\'': | |
tok = TOK_STRING; | |
string_delim = ch; | |
ch = getc( stream ); // skip the first quote | |
do { | |
int len = utf8_read_multibyte( stream, ch, &buf[wp]); | |
if( len == 0 ) return TOK_MALFORM; | |
if( ch == '\n' ) st->line++; | |
ch = getc( stream ); | |
wp += len; | |
} while( ch != string_delim ); | |
ch = getc( stream ); // skip trailing quote | |
break; | |
default: | |
if( isdigit(ch) ) { | |
tok = TOK_NUMBER; | |
do { | |
buf[wp++] = ch; | |
ch = getc( stream ); | |
} while( isdigit(ch) ); | |
} else if( strchr(operators, ch) ) { | |
tok = TOK_OPERATOR; | |
do { | |
buf[wp++] = ch; | |
ch = getc( stream ); | |
} while( strchr(operators, ch) ); | |
} else { | |
tok = TOK_SYMBOL; | |
do { | |
int len = utf8_read_multibyte( stream, ch, &buf[wp]); | |
if( len == 0 ) return TOK_MALFORM; | |
ch = getc( stream ); | |
wp += len; | |
} while( !strchr(delim, ch) && !strchr(operators, ch) ); | |
} | |
} | |
buf[wp] = '\0'; | |
st->ch = ch; // record last character read, for next iteration | |
return tok; | |
} | |
Object *define_slot( State* st, Vtable* vt, char *name, Object* obj ){ | |
Object *s_new = string_intern(st, name); | |
vtable_add_slot(vt, s_new, obj); | |
return s_new; | |
} | |
Object *define_operator( State* st, Vtable* vt, char *name, Primitive method, int preced, int right ){ | |
Object *s_new = define_slot(st, vt, name, (Object*)closure_new( st, method, 0, NULL )); | |
((String *)s_new)->preced = preced; | |
((String *)s_new)->right = right; | |
return s_new; | |
} | |
Object *define_primitive( State* st, Vtable* vt, char *name, Primitive method ) { | |
return define_slot(st, vt, name, (Object*)closure_new( st, method, 0, NULL )); | |
} | |
void object_list_slots( State* st, Closure* msg, Object *self ) { | |
Vtable *vt = self->_vt[-1]; | |
Object *symbol; | |
printf("%s slots:\n", vt->name ); | |
int i; | |
for( i = 0; i < vt->tally; ++i ) { | |
symbol = vt->bindings[i].key; | |
printf(" %s\n", ((String *)symbol)->chars ); | |
} | |
} | |
void object_ret( State* st, Closure* msg, Object *self ) { | |
st->ret = 1; | |
} | |
void object_pop( State* st, Closure* msg, Object *self ) { | |
stack_pop( st ); | |
} | |
void object_dup( State* st, Closure* msg, Object *self ) { | |
stack_push( st, stack_top(st) ); | |
} | |
void object_def( State* st, Closure* msg, Object *self ) { | |
Object *literal = stack_pop(st); | |
String *name = (String*) stack_pop(st); | |
Object *rec = stack_top(st); | |
define_slot(st, rec->_vt[-1], name->chars, literal); | |
} | |
void number_add(State* st, Closure* msg, Object *self) { | |
Number *a = (Number*) stack_pop(st); | |
Number *b = (Number*) stack_pop(st); | |
Object *sum = number_new( st, a->num + b->num ); | |
stack_push( st, sum ); | |
} | |
void number_times(State* st, Closure* msg, Object *self) { | |
Number *a = (Number*) stack_pop(st); | |
Closure *cls = (Closure*) stack_pop(st); | |
while( a->num-- ) execute(st, cls, stack_top(st)); | |
} | |
void number_print(State* st, Closure* msg, Object *self) { | |
printf("[%d] %d\n", st->sp, ((Number *) self)->num); | |
} | |
void string_print(State* st, Closure* msg, Object *self) { | |
printf("[%d] %s\n", st->sp, ((String *) self)->chars); | |
} | |
void closure_print(State* st, Closure* msg, Object *self) { | |
printf("[%d] <closure %p>\n", st->sp, self); | |
} | |
void closure_apply(State* st, Closure* msg, Object *self) { | |
Closure *cls = (Closure*) stack_pop(st); | |
execute(st, cls, stack_top(st)); | |
} | |
void boolean_if_true(State* st, Closure* msg, Object *self) { | |
Object *boolean = stack_pop(st); | |
Closure *cls = (Closure*) stack_pop(st); | |
if(boolean == st->s_true)execute(st, cls, stack_top(st)); | |
} | |
void boolean_if_false(State* st, Closure* msg, Object *self) { | |
Object *boolean = stack_pop(st); | |
Closure *cls = (Closure*) stack_pop(st); | |
if(boolean == st->s_false)execute(st, cls, stack_top(st)); | |
} | |
void boolean_print(State* st, Closure* msg, Object *self) { | |
printf("[%d] <%s>\n", st->sp, self == st->s_true ? "true" : "false"); | |
} | |
// * Executes the closure given as param. | |
void execute( State* st, Closure* cls, Object *self ) { | |
int ip = 0; | |
Instr *cur = cls->code; | |
while( cur->key && !st->ret ) { | |
// printf("[%d] exec: %p %s\n", ip, cur, ((String *)cur)->chars); | |
if( cur->kind == TOK_LIT ) stack_push(st, cur->key); | |
else send(st, stack_top(st), cur->key); | |
ip += 1; | |
cur = cls->code + ip; | |
} | |
st->ret = 0; // clear return flag | |
} | |
int parse_block(State* st, Instr *code, int top_level ) { | |
char symbol[BUFFER_SIZE]; | |
int count = 0; | |
Instr sc, oper_stack[32]; // operator stack | |
int sl = 0; // stack length | |
int tok = utf8_next_symbol(st, st->infile, symbol); | |
while( tok != TOK_END && tok != '}' ) { | |
if( tok == TOK_NUMBER ){ | |
code[count++] = (Instr){TOK_LIT, 0, 0, 0, number_new(st, atoi(symbol))}; | |
} else if( tok == TOK_STRING ) { | |
code[count++] = (Instr){TOK_LIT, 0, 0, 0, string_new( st, symbol )}; | |
} else if( tok == '{') { | |
Instr block_code[CODE_SIZE]; | |
int block_count = parse_block(st, block_code, 0); | |
if( block_count < 0 ) die("Fatal: unterminated block\n"); | |
Closure *cls = closure_new( st, execute, block_count, block_code ); | |
code[count++] = (Instr){TOK_LIT, 0, 0, 0, (Object*) cls}; | |
} else if( tok == TOK_OPERATOR || tok == TOK_SYMBOL ){ | |
String *c = (String*)string_intern(st, symbol); | |
while( sl > 0 ) { | |
sc = oper_stack[sl - 1]; | |
int assoc = (!c->right && (c->preced <= sc.preced)) || (c->preced < sc.preced); | |
if( !assoc ) break; | |
printf("%d: %s -> %s\n", count, symbol, ((String*)sc.key)->chars); | |
code[count++] = sc; | |
sl--; // Pop op2 off the stack | |
} | |
oper_stack[sl++] = (Instr){tok, 2, c->preced, c->right, (Object*)c }; // push op1 onto the stack. | |
// } else if(tok == TOK_SYMBOL ) { | |
// oper_stack[sl++] = (Instr){K_CALL, 0, 0, 0, string_intern(st, symbol)}; // push op1 onto the stack. | |
} else if(tok == '(') { | |
} else { | |
die("[%d] not a valid symbol: %s\n", tok, symbol); | |
} | |
tok = utf8_next_symbol(st, st->infile, symbol); | |
} | |
// When there are no more tokens to read and there are still operator tokens in the stack: | |
while( sl > 0 ) { | |
sc = oper_stack[sl - 1]; | |
// if( sc == '(' || sc == ')' ) { | |
// printf("Error: parentheses mismatched\n"); | |
// return false; | |
// } | |
code[count++] = sc; | |
sl--; | |
} | |
if( tok == TOK_END && !top_level ) return -1; | |
return count; | |
} | |
// interpret: eval top level block | |
void interpret( State* st ) { | |
Instr block_code[CODE_SIZE]; | |
int count = parse_block(st, block_code, 1); | |
if( count > 0 ){ | |
Closure *cls = closure_new( st, execute, count, block_code ); | |
execute(st, cls, stack_top(st)); | |
} | |
} | |
void define_primitives( State* st ) { | |
define_primitive(st, st->object_vt, "dup", object_dup); | |
define_primitive(st, st->object_vt, "pop", object_pop); | |
define_primitive(st, st->object_vt, "ret", object_ret); // this one should be an operator | |
define_primitive(st, st->object_vt, "def", object_def); | |
define_slot(st, st->object_vt, "true", st->s_true); | |
define_slot(st, st->object_vt, "false", st->s_false); | |
define_primitive(st, st->object_vt, "list_slots", object_list_slots); | |
define_primitive(st, st->number_vt, "print", number_print); | |
define_operator(st, st->number_vt, "+", number_add, 1, 0); | |
define_primitive(st, st->number_vt, "times", number_times); | |
define_primitive(st, st->string_vt, "print", string_print); | |
define_primitive(st, st->closure_vt, "apply", closure_apply); | |
define_primitive(st, st->closure_vt, "print", closure_print); | |
define_primitive(st, st->boolean_vt, "if_true", boolean_if_true); | |
define_primitive(st, st->boolean_vt, "if_false", boolean_if_false); | |
// define_primitive(st, st->boolean_vt, "while_true", boolean_while_true); | |
define_primitive(st, st->boolean_vt, "print", boolean_print); | |
} | |
// Main function! | |
int main(int argc, char **argv) { | |
if( argc != 2 ) { | |
printf("Usage: lys filename.lys\n"); | |
return 0; | |
} | |
State* st = state_new(); | |
st->infile = fopen( argv[1],"r" ); | |
if( !st->infile ) { | |
perror("open"); | |
exit(1); | |
} | |
st->ch = getc(st->infile); | |
define_primitives( st ); | |
interpret( st ); | |
fclose(st->infile); | |
return 0; | |
} | |
/* | |
int priority(c) { | |
switch(c){ | |
case '+': return 1; | |
case '*': return 2; | |
case '^': return 3; | |
default: return 0; | |
} | |
} | |
int RightAssoc(c) { | |
if( c == '^') return 1; | |
else return 0; | |
} | |
void parseExpr(prec) { | |
p = parsePrimary(); | |
while(priority(input) >= prec){ | |
let oper = get(input); | |
let oprec = Priority(oper); | |
p := mknode(p, oper, parseE(if RightAssoc(oper) then oprec else oprec+1)) | |
} | |
} | |
int parsePrimary() { | |
switch( tok ){ | |
case '(': | |
parseExpr(1); | |
if( tok != ')') die("paren mismatched\n"); | |
return p | |
case '{': | |
case TOK_NUMBER: | |
case TOK_STRING: | |
default: | |
die("unknown\n"); | |
} | |
} | |
*/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python2.5 | |
# | |
# Copyright (c) 2008 Samuel A. Falvo II | |
# All rights reserved. | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are | |
# met: | |
# | |
# * Redistributions of source code must retain the above copyright | |
# notice, this list of conditions and the following disclaimer. | |
# | |
# * Redistributions in binary form must reproduce the above copyright | |
# notice, this list of conditions and the following disclaimer in the | |
# documentation and/or other materials provided with the | |
# distribution. | |
# | |
# * Neither Samuel A. Falvo II, Falvotech, nor the names of its | |
# contributors may be used to endorse or promote products derived | |
# from this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | |
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
"""A super-simple, quick-n-dirty Io-syntax lexer and parser. | |
This parser/lexer combination is used to parse an Io-syntax source string | |
into a hierarchy of Node objects. An entire tree is returned for the | |
string. | |
NOTE | |
This parser is not really intended for mission-critical software, since | |
it was written only as an exploratory piece (e.g., how difficult is it to | |
parse Io-syntax sources?). If you're interested in using it in real- | |
world software, you do so at your own risk. :) | |
It currently lacks support for comments, but otherwise seems complete. | |
""" | |
__author__ = "Samuel A. Falvo II <[email protected]>" | |
__version__ = "1" | |
__license__ = "Simplified BSD" | |
__date__ = "2008 Feb 23" | |
import string | |
class Node(object): | |
"""A node in the abstract syntax tree. This 'class' is really used as a | |
structure; it has no significant methods of its own, and is intended to | |
be augmented by subsequent compiler passes, if required. | |
Fields: | |
name = the name of a lexical unit. Identifiers start with a letter or | |
underscore. Numbers start with a digit. Strings start with the | |
double-quote character ("). Punctuation symbols represent them- | |
selves. In some cases, this field may be an empty string. | |
arguments = A list of _programs_, where a program is itself a list of | |
Node objects. Hence, to get the first symbol of the first | |
argument, reference arguments[0][0], not just arguments[0]. | |
""" | |
def __init__(self, name): | |
self.name = name | |
self.arguments = [] | |
def __eq__(self, anotherNode): | |
"""Only for the benefit of the unit tests.""" | |
return self.name == anotherNode.name | |
def isspace(aCharacter): | |
return aCharacter in string.whitespace | |
def isalpha(aCharacter): | |
return (aCharacter in string.letters) or (aCharacter == '_') | |
def isIdentifier(aCharacter): | |
return isalpha(aCharacter) or isdigit(aCharacter) or (aCharacter == '?') | |
def isdigit(aCharacter): | |
return aCharacter in string.digits | |
endOfInput = -1 | |
class Lexer(object): | |
"""Lexes an input string for parsable symbols. | |
Implements a single token look-ahead. | |
Fields: | |
current = the current, or pending, symbol in the source text to be | |
consumed by a parser. Will be set to endOfInput when there | |
is no more input data to lex. Strings will include their | |
leading quotes. | |
next = the next after current symbol in the input stream. | |
""" | |
def __init__(self, string): | |
self.input = string | |
self.current = None | |
self.next = None | |
self.pump() | |
self.pump() | |
def pump(self): | |
"""Consumes a single lexical token. What was in next | |
now appears in current, and a new value is loaded into | |
next. | |
""" | |
self.current = self.next | |
self.lex() | |
def pendingCharacter(self): | |
return self.input[0] | |
def advanceCharacter(self): | |
self.input = self.input[1:] | |
def parseIdentifier(self): | |
largestIndex = len(self.input) | |
i = 0 | |
while (i < largestIndex) and isIdentifier(self.input[i]): | |
i = i+1 | |
self.next = self.input[0:i] | |
self.input = self.input[i:] | |
def parseNumber(self): | |
largestIndex = len(self.input) | |
i = 0 | |
while (i < largestIndex) and isdigit(self.input[i]): | |
i = i + 1 | |
self.next = self.input[0:i] | |
self.input = self.input[i:] | |
def parseQuotedString(self): | |
largestIndex = len(self.input) | |
i = 1 # index 0 is known to be a quote. | |
while (i < largestIndex) and (self.input[i] != '"'): | |
i = i + 1 | |
self.next = self.input[0:i+1] | |
self.input = self.input[i+1:] | |
def lex(self): | |
if self.input == "": | |
self.next = endOfInput | |
elif self.pendingCharacter() == '\n': | |
self.next = ";" | |
self.advanceCharacter() | |
elif isspace(self.pendingCharacter()): | |
self.advanceCharacter() | |
self.lex() | |
elif isalpha(self.pendingCharacter()): | |
self.parseIdentifier() | |
elif isdigit(self.pendingCharacter()): | |
self.parseNumber() | |
elif self.pendingCharacter() == '"': | |
self.parseQuotedString() | |
else: | |
self.next = self.pendingCharacter() | |
self.advanceCharacter() | |
def parseArguments(theLexer): | |
args = [] | |
arg = [] | |
while theLexer.current != endOfInput: | |
if theLexer.current == ')': | |
if arg: | |
args.append(arg) | |
break | |
elif theLexer.current == ',': | |
args.append(arg) | |
arg = [] | |
theLexer.pump() | |
else: | |
arg = parseExpression(theLexer) | |
return args | |
def parseExpression(theLexer): | |
tree = [] | |
while theLexer.current != endOfInput: | |
if theLexer.current == ",": | |
break | |
elif theLexer.current == ")": | |
break | |
elif theLexer.current == "(": | |
theLexer.pump() | |
args = parseArguments(theLexer) | |
if theLexer.current == ")": | |
if not tree: | |
tree = [Node("")] | |
if tree[-1].arguments: | |
tree.append(Node("")) | |
tree[-1].arguments = args | |
theLexer.pump() | |
else: | |
print "Syntax Error: ')' expected" | |
else: | |
tree.append(Node(theLexer.current)) | |
theLexer.pump() | |
return tree | |
def parse(aString): | |
l = Lexer(aString) | |
return parseExpression(l) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// code from https://github.com/proglangclass/vm | |
// copied in one file for easy inspection | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <assert.h> | |
// runtime | |
enum { tObject, tNumber, tString }; | |
typedef struct { | |
char type; | |
union { | |
char *string; | |
int number; | |
} value; | |
int refcount; | |
} Object; | |
static Object *TrueObject; | |
static Object *FalseObject; | |
static Object *NilObject; | |
Object *Object_new() { | |
return calloc(1, sizeof(Object)); | |
} | |
/////////////// GC /////////////// | |
Object *retain(Object *self) { | |
self->refcount++; | |
return self; | |
} | |
void release(Object *self) { | |
self->refcount--; | |
if (self->refcount <= 0) | |
free(self); | |
} | |
/////////////////////////////////// | |
char Object_is_true(Object *self) { | |
// false and nil are == false, everything else is true. | |
if (self == FalseObject || self == NilObject) return 0; | |
return 1; | |
} | |
Object *Number_new(int value) { | |
Object *object = Object_new(); | |
object->type = tNumber; | |
object->value.number = value; | |
return object; | |
} | |
int Number_value(Object *self) { | |
assert(self->type == tNumber); | |
return self->value.number; | |
} | |
Object *String_new(char *value) { | |
Object *object = Object_new(); | |
object->type = tString; | |
object->value.string = value; | |
return object; | |
} | |
Object *call(Object *receiver, char *message, Object *argv[], int argc) { | |
// HACK hardcoded methods. In a real runtime, you'd have proper classes and methods. | |
// See runtime.rb in our interpreter for an example. | |
if (strcmp(message, "+") == 0) { | |
// Number#+ | |
return Number_new(receiver->value.number + argv[0]->value.number); | |
} else if (strcmp(message, "print") == 0) { | |
// Object#print | |
switch (argv[0]->type) { | |
case tNumber: | |
printf("%d\n", argv[0]->value.number); | |
return argv[0]; | |
case tString: | |
printf("%s\n", argv[0]->value.string); | |
return argv[0]; | |
} | |
} | |
return 0; | |
} | |
void init_runtime() { | |
TrueObject = retain(Object_new()); | |
FalseObject = retain(Object_new()); | |
NilObject = retain(Object_new()); | |
} | |
void destroy_runtime() { | |
release(TrueObject); | |
release(FalseObject); | |
release(NilObject); | |
} | |
// opcode | |
enum { | |
// ------- Stack ------- | |
// Opcode Operands before after | |
/* 00 */ CALL, // index, argc [rcv, arg...] [returned] | |
/* 01 */ PUSH_NUMBER, // index [] [number] | |
/* 02 */ PUSH_STRING, // index [] [string] | |
/* 03 */ PUSH_SELF, // [] [self] | |
/* 04 */ PUSH_NIL, // [] [nil] | |
/* 05 */ PUSH_BOOL, // 1=t, 0=f [] [true or false] | |
/* 06 */ GET_LOCAL, // index [] [value] | |
/* 07 */ SET_LOCAL, // index [value] [] | |
/* 08 */ JUMP_UNLESS, // offset [test] [] | |
/* 09 */ JUMP, // offset [] [] | |
/* 10 */ ADD, // [a, b] [result] | |
/* 11 */ RETURN // [] [] | |
}; | |
typedef unsigned char byte; // 1 byte | |
// Helpers to play with the stack | |
#define LOCALS_MAX 10 | |
#define STACK_MAX 10 | |
#define STACK_PUSH(I) do { \ | |
assert(sp-stack < STACK_MAX); \ | |
*(++sp) = (I); \ | |
retain(*sp); \ | |
} while(0) | |
#define STACK_POP() (*sp--) | |
void run(void *literals[], byte instructions[]) { | |
byte *ip = instructions; // instruction pointer | |
Object *stack[STACK_MAX]; // THE famous stack | |
Object **sp = stack; // stack pointer, keeps track of current position | |
Object *locals[LOCALS_MAX] = {}; // where we store our local variables | |
// Setup the runtime | |
Object *self = Object_new(); | |
retain(self); | |
// Start processing instructions | |
while (1) { | |
switch (*ip) { | |
case PUSH_NUMBER: { | |
ip++; // advance to the operand (literal index) | |
STACK_PUSH(Number_new((int)literals[*ip])); | |
break; | |
} | |
case PUSH_STRING: { | |
ip++; // advance to the operand (literal index) | |
STACK_PUSH(String_new((char *)literals[*ip])); | |
break; | |
} | |
case PUSH_SELF: { | |
STACK_PUSH(self); | |
break; | |
} | |
case PUSH_NIL: { | |
STACK_PUSH(NilObject); | |
break; | |
} | |
case PUSH_BOOL: { | |
ip++; // advance to operand (0 = false, 1 = true) | |
if (*ip == 0) { | |
STACK_PUSH(FalseObject); | |
} else { | |
STACK_PUSH(TrueObject); | |
} | |
break; | |
} | |
case CALL: { | |
ip++; // advance to operand (method name index in literals) | |
char *method = literals[*ip]; | |
ip++; // advance to operand (# of args) | |
int argc = *ip; | |
Object *argv[10]; | |
int i; | |
for (i = argc - 1; i >= 0; i--) argv[i] = STACK_POP(); | |
Object *receiver = STACK_POP(); | |
Object *result = call(receiver, method, argv, argc); | |
STACK_PUSH(result); | |
// Release all the objects | |
for (i = argc - 1; i >= 0; i--) release(argv[i]); | |
release(receiver); | |
break; | |
} | |
case RETURN: { | |
goto cleanup; | |
break; | |
} | |
case GET_LOCAL: { | |
ip++; // advance operand (local index) | |
STACK_PUSH(locals[*ip]); | |
break; | |
} | |
case SET_LOCAL: { | |
ip++; // local index | |
locals[*ip] = STACK_POP(); | |
break; | |
} | |
case ADD: { | |
Object *a = STACK_POP(); | |
Object *b = STACK_POP(); | |
STACK_PUSH(Number_new(Number_value(a) + Number_value(b))); | |
release(a); | |
release(b); | |
break; | |
} | |
case JUMP_UNLESS: { | |
ip++; // operand (offset, # of bytes to jump forward) | |
byte offset = *ip; | |
Object *condition = STACK_POP(); | |
if (!Object_is_true(condition)) ip += offset; | |
release(condition); | |
break; | |
} | |
case JUMP: { | |
ip++; // operand (offset, # of bytes to jump forward) | |
byte offset = *ip; | |
ip += offset; | |
break; | |
} | |
} | |
ip++; | |
} | |
cleanup: | |
release(self); | |
int i; | |
// Release all the local variables | |
for (i = 0; i < LOCALS_MAX; i++) if (locals[i]) release(locals[i]); | |
// Release everything left on the stack | |
while (sp > stack) release(STACK_POP()); | |
} | |
// bytecode | |
/* | |
print("the answer is:") | |
a = 30 + 2 | |
if false | |
print(a) | |
else | |
print("no") | |
end | |
*/ | |
#define LITERALS { \ | |
(void *) "the answer is:", \ | |
(void *) "print", \ | |
(void *) 30, \ | |
(void *) 2, \ | |
(void *) "no", \ | |
} | |
#define INSTRUCTIONS { 3, 2, 0, 0, 1, 1, 1, 2, 1, 3, 10, 7, 0, 5, 0, 8, 8, 3, 6, 0, 0, 1, 1, 9, 6, 3, 2, 4, 0, 1, 1, 11 } | |
int main (int argc, char const *argv[]) { | |
// Get compiled literals table and instructions from bytecode.h | |
void *literals[] = LITERALS; | |
byte instructions[] = INSTRUCTIONS; | |
init_runtime(); | |
// Run that thing! | |
run(literals, instructions); | |
destroy_runtime(); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Implements the Straw Man (SM) language. SM is intended to explore | |
a minimal scripting language. This module is a prototype and has undergone | |
heavy tweaking, and has many known bugs. However, | |
the test cases at the end do all work more or less as promised. | |
To get a feel for SM, run the modules and examine the output of each test | |
case. | |
''' | |
from copy import copy | |
import re | |
# define a token in the straw-main language. | |
tokenRE = re.compile(r'".*?"|\(|\)|\}|\{|;|[^(){}\s]+') | |
# used to detect proper symbols; everything else is assumed to be a literal. | |
symbolRE = re.compile(r'[a-z][a-z0-9_]*') | |
class Node(object): | |
'An empty symbol node.' | |
def __init__(self, symbol): | |
self.sym = symbol | |
self.args = [] | |
self.val = symbol | |
def __str__(self): | |
if not self.args: | |
return str(self.val) | |
else: | |
arg_strings = map(str, self.args) | |
return "%s(%s)" % (self.val, " ".join(arg_strings)) | |
class DoNode(Node): | |
'An empty "do" symbol node, which uses the {} syntactic sugar.' | |
def __init__(self): | |
self.sym = 'do' | |
self.args = [] | |
def __str__(self): | |
return "{%s}" % "; ".join(map(str, self.args)) | |
class ValueNode(Node): | |
'A value node bound to a particular value at creation.' | |
def __init__(self, value): | |
self.sym = 'value' | |
self.val = value | |
self.args = [] | |
def val(self): | |
return self.val | |
class Stack(list): | |
' a thin wrapper around list() to make it more stack-like.' | |
def peek(self): | |
' returns the top of the stack. ' | |
return self[-1] | |
def push(self, top): | |
' adds an element to the top of the stack. ' | |
self.append(top) | |
def parse(string): | |
'returns an AST for the SM grammer.' | |
main = Node('do') | |
# I love this trick; when working with any | |
# kind of tree, we can often avoid treating | |
# the root as a special case by simply | |
# starting with a dummy node above the root. | |
context = Stack() | |
context.push(main) | |
ast = None | |
for match in tokenRE.finditer(string): | |
token = match.group(0) | |
if token == ";": | |
if context.peek().sym != 'do': | |
raise SyntaxError, "misplaced semicolon!" | |
elif token == '(': | |
if context.peek() == ast: | |
raise SyntaxError, "'(' does not follow symbol!" | |
context.push(ast) | |
elif token == ')': | |
context.pop() | |
elif token == '{': | |
ast = DoNode() | |
context.peek().args.append(ast) | |
context.push(ast) | |
elif token == '}': | |
if context.peek().sym != 'do': | |
raise SyntaxError, "'}' terminating non-do block!" | |
context.pop() | |
elif symbolRE.match(token): | |
ast = Node(token) | |
context.peek().args.append(ast) | |
else: # must be a literal | |
ast = ValueNode(eval(token)) | |
context.peek().args.append(ast) | |
return main | |
### eval functions for each primitive ### | |
def indexSafe(f): | |
'decorator to handle a common exception' | |
def newF(*x, **y): | |
try: | |
return f(*x, **y) | |
except: | |
return ValueNode('') | |
return newF | |
@indexSafe | |
def evalIf(ast, env): | |
if evaluate(ast.args[0], env): | |
return evaluate(ast.args[1], env) | |
else: | |
return evaluate(ast.args[2], env) | |
@indexSafe | |
def evalSet(ast, env): | |
name = ast.args[0].sym | |
value = evaluate(ast.args[1], env) | |
env[name] = value | |
return value | |
@indexSafe | |
def evalGt(ast, env): | |
left = int(evaluate(ast.args[0], env).val) | |
right = int(evaluate(ast.args[1], env).val) | |
return 'true' if left > right else '' | |
@indexSafe | |
def evalPrint(ast,env): | |
ret = '' | |
for arg in ast.args: | |
ret = evaluate(arg, env) | |
print ret, | |
return ret | |
@indexSafe | |
def evalDo(ast,env): | |
ret = '' | |
for arg in ast.args: | |
if arg.sym == 'return': | |
if len(arg.args) == 0: return '' | |
return evaluate(arg.args[0], env) | |
else: | |
ret = evaluate(arg, env) | |
return ret | |
@indexSafe | |
def evalAdd(ast,env): | |
left = int(evaluate(ast.args[0], env).val) | |
right = int(evaluate(ast.args[1], env).val) | |
return ValueNode(str(left + right)) | |
@indexSafe | |
def evalEqual(ast,env): | |
left = evaluate(ast.args[0], env) | |
right = evaluate(ast.args[1], env) | |
return True if left == right else False | |
@indexSafe | |
def evalQuote(ast, env): | |
return copy(ast.args[0]) | |
@indexSafe | |
def evalEval(ast, env): | |
return evaluate(evaluate(ast.args[0], env), env) | |
@indexSafe | |
def evalString(ast, env): | |
return ValueNode(str(ast)) | |
@indexSafe | |
def evalParse(ast, env): | |
string = str(evaluate(ast.args[0], env).val) | |
return parse(string) | |
@indexSafe | |
def evalLambda(ast, env): | |
parameters = ast.args[0].args # not evaluated! | |
expression = ast.args[1] | |
l = Node('function') | |
p = Node('parameters') | |
p.args = parameters | |
l.args = [p, expression ] | |
return l | |
def evalData(ast, env): | |
return ast | |
@indexSafe | |
def evalProduct(ast, env): | |
left = int(evaluate(ast.args[0],env).val) | |
right = int(evaluate(ast.args[1],env).val) | |
return ValueNode(str(left * right)) | |
@indexSafe | |
def evalVal(ast, env): | |
return ValueNode(evaluate(ast.args[0], env).val) | |
@indexSafe | |
def evalFor(ast, env): | |
'for(init,condition,increment,body)' | |
evaluate(ast.args[0], env) | |
ret = ValueNode('') | |
while evaluate(ast.args[1], env): | |
ret = evaluate(ast.args[3], env) | |
evaluate(ast.args[2], env) | |
return ret | |
@indexSafe | |
def evalForEach(ast, env): | |
symbol = ast.args[0].sym | |
collection = evaluate(ast.args[1],env) | |
body = ast.args[2] | |
ret = ValueNode('') | |
for arg in collection.args: | |
env[symbol] = arg | |
ret = evaluate(body,env) | |
return ret | |
keywords = {'do':evalDo, | |
'if':evalIf, | |
'set':evalSet, | |
'gt':evalGt, | |
'print':evalPrint, | |
'add':evalAdd, | |
'eq':evalEqual, | |
'quote':evalQuote, | |
'q':evalQuote, | |
'eval':evalEval, | |
'string':evalString, | |
'parse':evalParse, | |
'data':evalData, | |
'd':evalData, | |
'lambda':evalLambda, | |
'product':evalProduct, | |
'val':evalVal, | |
'for':evalFor, | |
'foreach':evalForEach, | |
} | |
def evaluate(ast, env={}): | |
' evaluates an Node given an environment.' | |
if ast.sym in keywords: | |
return keywords[ast.sym](ast, env) | |
elif ast.sym in env: | |
value = env[ast.sym] | |
if hasattr(value,'sym') and value.sym == 'function': | |
return functionCall(value, ast, env) | |
else: | |
return value | |
else: | |
return ast | |
def functionCall(function, call, env): | |
arguments = [ evaluate(arg, env) for arg in call.args ] | |
parameters = [ param.sym for param in function.args[0].args] | |
code = function.args[1] | |
localEnv = dict(zip(parameters, arguments)) | |
localEnv.update(env) | |
return evaluate(code, localEnv) | |
def run(codeString,env={}): | |
'parses and evaluates an SM expression.' | |
return evaluate(parse(codeString),env) | |
tests = [] | |
tests.append( ''' | |
set(x 1) | |
for(set(i 0) gt(8 i) set(i add(i 1)) { | |
print("2 to the " i " = " x) | |
set(x add(x x)) | |
}) | |
''') | |
tests.append( ''' | |
set(x 17); | |
set(y 8); | |
print( "x=" x "y=" y); | |
print( "changing x..."); | |
set(x 9); | |
print( "x=" x "y=" y); | |
if( gt(x y) | |
{ print("x is greater than y."); set(z x); } | |
{ print("x is not greater than y."); set(z y); } | |
); | |
print( "greatest of x and y:" z ); | |
print( "total of x and y:" add( x y) ); | |
''') | |
tests.append( ''' | |
set(string "Love"); | |
if ( eq(string "Love") | |
{ set(string "Hate");} | |
) | |
print("string = " string) | |
return(z); | |
print( "not reached!" ); | |
''') | |
tests.append( ''' | |
set(code quote(print ("hello world!")) ) | |
print ("code = '" code "'") | |
print ("when I evaluate the code, I should see 'hello world' as a side effect:") | |
eval(code)''') | |
tests.append( ''' | |
set(x 1) | |
set(y 2) | |
set( sum quote(add(x y)) ) | |
print ( "sum = " sum ) | |
print ( "add(x y) = " add(x y)) | |
print ( "eval(sum) = " eval(sum) ) | |
print ( "setting x to 10..." ) | |
set(x 10) | |
print ( "eval(sum) = " eval(sum) ) | |
''') | |
tests.append( ''' | |
set(code_string "print(hello world!)") | |
set(code parse(code_string)) | |
print (code_string) | |
print (code) | |
eval(code) | |
''') | |
tests.append( ''' | |
set(f lambda ( quote(x) add(x 1) )) | |
print( f(2)) | |
set(metric lambda ( quote(x y) add(product(x x) product(y y)) ) ) | |
print( metric(5 10) ) | |
print( metric(3 4) ) | |
''') | |
tests.append( ''' | |
for( set(i 0) gt(10 i) set(i add(i 1)) { | |
print("i=" i) | |
}) | |
''') | |
tests.append( ''' | |
set(list data(1 2 3)) | |
print(list) | |
foreach(element list { | |
print(element) | |
}) | |
''') | |
tests.append( ''' | |
set(tree data("Friends" ("John" "Jill") "Pets" ("Spot" "Tiger") )) | |
print(tree) | |
foreach(branch tree { | |
print(val(branch) ":") | |
foreach(leaf branch { | |
print(" " leaf) | |
}) | |
}) | |
''') | |
for t in tests: | |
print "code:",t | |
print "output:" | |
run(t,{}) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment