Skip to content

Instantly share code, notes, and snippets.

@andersonsp
Last active August 29, 2015 14:01
Show Gist options
  • Save andersonsp/7c0d6643de0dbc76632f to your computer and use it in GitHub Desktop.
Save andersonsp/7c0d6643de0dbc76632f to your computer and use it in GitHub Desktop.
// lys.c forth version by AndersonSP
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdarg.h>
//----------------------------------------------------------------
// Object defs based on http://piumarta.com/software/id-objmodel/
//
typedef struct _State State;
typedef struct _Vtable Vtable;
typedef struct _Object Object;
typedef struct _Closure Closure;
typedef struct _Number Number;
typedef struct _String String;
typedef void (*Primitive)(State* st, Closure *w, Object *self);
#define BUFFER_SIZE 512
#define CODE_SIZE 1024
#define STACK_SIZE 512
void execute( State* st, Closure* msg, Object *self );
Vtable *vtable_delegated(Vtable *self, char* name);
Object *object_new(Vtable *self, int payloadSize);
Object *vtable_add_slot(Vtable *self, Object *key, Object *value);
void stack_push( State* st, Object* p );
Object* stack_pop( State* st );
Object* stack_top( State* st );
struct _State{
Vtable *vtable_vt, *object_vt, *closure_vt; // core object prototypes
Vtable *number_vt, *string_vt, *boolean_vt; // ...
Vtable *dict; // symbol dictionary
Object *lobby, *s_true, *s_false; // default objects, lobby should be the first element of the stack at all moments
Object* stack[STACK_SIZE];
int ret, sp; // return flag, top of stack pointer
int ch, line; // for parsing
FILE* infile;
};
typedef struct {
Object *key, *value;
} Assoc;
typedef struct {
uint8_t kind, arity, preced, right;
Object *key;
} Instr;
struct _Vtable {
Vtable *_vt[0], *parent;
char name[32];
int size, tally;
Assoc *bindings;
};
struct _Object {
Vtable *_vt[0];
};
struct _Closure {
Vtable *_vt[0];
Primitive method;
int length;
Instr code[1];
};
struct _String {
Vtable *_vt[0];
int length, preced, right; // for operators
char chars[1];
};
struct _Number {
int num;
};
void die( const char* str, ... ) {
va_list a;
va_start(a, str);
vfprintf(stderr, str, a );
exit(1);
}
static inline Object *vtable_lookup(Vtable *self, Object *key) {
Vtable* vt = self;
Assoc* assoc;
while( vt ){
for( assoc = vt->bindings; assoc->key; assoc++ ) {
if( key == assoc->key ) return assoc->value;
}
vt = vt->parent;
}
die("Fatal:\n %s:%p has no slot '%s'\n", self->name, self, ((String *)key)->chars );
return 0; //unreached
}
//TODO: this could use some caching
inline void send(State* st, Object *rcv, Object* msg) {
Object *slot = vtable_lookup(rcv->_vt[-1], msg);
if( slot->_vt[-1] == st->closure_vt ) ((Closure*)slot)->method( st, (Closure*)slot, rcv );
else stack_push( st, slot );
}
inline void *alloc(size_t size) {
Vtable **ppvt= (Vtable **)calloc(1, sizeof(Vtable *) + size);
return (void *)(ppvt + 1);
}
inline void dealloc( Object* self) {
if(self) free( &self->_vt[-1] );
}
Object *number_new(State* st, int num) {
Number *clone = (Number *)object_new(st->number_vt, sizeof(Number));
clone->num = num;
return (Object *)clone;
}
Object *string_new(State* st, char *str) {
int len = strlen(str);
String *string = (String *)alloc(sizeof(String) + len + 1);
string->_vt[-1] = st->string_vt;
string->length = len;
strcpy(string->chars, str);
return (Object *)string;
}
Closure *closure_new(State* st, Primitive method, int num_instr, Instr* instr_list) {
Closure *closure = (Closure *)alloc(sizeof(Closure) + sizeof(Instr) * num_instr);
closure->_vt[-1] = st->closure_vt;
closure->method = method;
if( instr_list ){
closure->length = num_instr;
memcpy( closure->code, instr_list, num_instr*sizeof(Instr));
}
return closure;
}
Vtable *vtable_delegated(Vtable *self, char* name) {
Vtable *child= (Vtable *)alloc( sizeof(Vtable) );
child->_vt[-1] = self ? self->_vt[-1] : child;
child->size = 16;
child->tally = 0;
child->bindings = (Assoc *)calloc(child->size, sizeof(Assoc));
child->parent = self;
strcpy(child->name, name);
return child;
}
Object *object_new(Vtable *self, int payloadSize ) {
Object *object = (Object *)alloc(payloadSize);
object->_vt[-1] = self;
return object;
}
Object* vtable_add_slot(Vtable *self, Object *key, Object *value) {
Assoc* assoc;
for( assoc = self->bindings; assoc->key; assoc++ ) {
if( key == assoc->key ) {
dealloc(assoc->value);
return assoc->value = value;
}
}
if( self->tally == self->size-1 ) {
self->size *= 2;
self->bindings = (Assoc *)realloc(self->bindings, sizeof(Assoc) * self->size);
}
assoc = &self->bindings[self->tally++];
assoc->key = key;
return assoc->value = value;
}
Object *string_intern(State* st, char *string){
Object *symbol;
int i;
for( i = 0; i < st->dict->tally; ++i ) {
symbol = st->dict->bindings[i].key;
if( !strcmp(string, ((String *)symbol)->chars) ) return symbol;
}
symbol = string_new( st, string );
vtable_add_slot( st->dict, symbol, 0 );
return symbol;
}
State* state_new() {
State *st = calloc(sizeof(State), 1);
st->vtable_vt = vtable_delegated(0, "Vtable");
st->vtable_vt->_vt[-1] = st->vtable_vt;
st->object_vt = vtable_delegated(0, "Object");
st->object_vt->_vt[-1] = st->vtable_vt;
st->vtable_vt->parent = st->object_vt;
st->dict = vtable_delegated(0, "SymbolTable");
st->dict->_vt[-1] = 0;
st->closure_vt = vtable_delegated(st->object_vt, "Closure");
st->number_vt = vtable_delegated(st->object_vt, "Number");
st->string_vt = vtable_delegated(st->object_vt, "String");
st->boolean_vt = vtable_delegated(st->object_vt, "Boolean");
st->lobby = object_new(st->object_vt, 0);
st->s_true = object_new(st->boolean_vt, 0);
st->s_false = object_new(st->boolean_vt, 0);
stack_push( st, st->lobby );
return st;
}
void stack_push( State* st, Object* p ) {
st->stack[++st->sp] = p;
}
Object* stack_pop( State* st ) {
if(st->sp > 1) return st->stack[st->sp--];
die( "Fatal: Trying to pop the lobby from the stack\n" );
}
Object* stack_top( State* st ) {
return st->stack[st->sp];
}
// * Read a word from any utf8 stream and return it.
static inline utf8_seq( uint8_t first_byte ){
if((first_byte & 0x80) == 0x00) return 1; // 0xxxxxxx
if((first_byte & 0xC0) == 0x80) return 0; // 10xxxxxx - invalid
if((first_byte & 0xE0) == 0xC0) return 2; // 110xxxxx
if((first_byte & 0xF0) == 0xE0) return 3; // 1110xxxx
if((first_byte & 0xF8) == 0xF0) return 4; // 11110xxx
if((first_byte & 0xFC) == 0xF8) return 5; // 111110xx
if((first_byte & 0xFE) == 0xFC) return 6; // 1111110x
}
static int utf8_read_multibyte(FILE *stream, int first, uint8_t *bytes) {
if( first == EOF ) return 0; // don't read EOF into the buffer
int i, len = utf8_seq( first );
bytes[0] = first;
for( i=1; i<len; i++ ) { // read subsequent character bytes
int c = getc(stream);
if( c == EOF || (c & 0xC0) != 0x80 ) return 0;
bytes[i] = c;
}
return len; // return number of bytes read into buffer
}
static const char *operators = ":~!@$%^&*-+=|\\<>?/";
static const char *delim = " \t\r\n()[]{};,";
enum { TOK_END, TOK_LIT, TOK_NUMBER, TOK_STRING, TOK_SYMBOL, TOK_OPERATOR, TOK_MALFORM, TOK_UNKNOWN };
static int utf8_next_symbol( State* st, FILE* stream, uint8_t* buf ) {
int tok = TOK_UNKNOWN, ch = st->ch, wp = 0;
int string_delim;
again:
// printf("ch: %c \n", ch);
switch(ch) {
case ' ': case '\t': case '\r': // eat whitespace
ch = getc( stream );
goto again;
case '\n':
st->line++;
ch = getc( stream );
goto again;
case '#':
do {
ch = getc( stream );
} while( ch != '\n' && ch != EOF && ch != '\0' );
goto again;
case EOF: case '\0': // end of file
return TOK_END;
case '(': case ')': case '[': case ']': case '{': case '}': //case ',': case ';':
tok = ch;
ch = getc(stream);
break;
case '\"': case '\'':
tok = TOK_STRING;
string_delim = ch;
ch = getc( stream ); // skip the first quote
do {
int len = utf8_read_multibyte( stream, ch, &buf[wp]);
if( len == 0 ) return TOK_MALFORM;
if( ch == '\n' ) st->line++;
ch = getc( stream );
wp += len;
} while( ch != string_delim );
ch = getc( stream ); // skip trailing quote
break;
default:
if( isdigit(ch) ) {
tok = TOK_NUMBER;
do {
buf[wp++] = ch;
ch = getc( stream );
} while( isdigit(ch) );
} else if( strchr(operators, ch) ) {
tok = TOK_OPERATOR;
do {
buf[wp++] = ch;
ch = getc( stream );
} while( strchr(operators, ch) );
} else {
tok = TOK_SYMBOL;
do {
int len = utf8_read_multibyte( stream, ch, &buf[wp]);
if( len == 0 ) return TOK_MALFORM;
ch = getc( stream );
wp += len;
} while( !strchr(delim, ch) && !strchr(operators, ch) );
}
}
buf[wp] = '\0';
st->ch = ch; // record last character read, for next iteration
return tok;
}
Object *define_slot( State* st, Vtable* vt, char *name, Object* obj ){
Object *s_new = string_intern(st, name);
vtable_add_slot(vt, s_new, obj);
return s_new;
}
Object *define_operator( State* st, Vtable* vt, char *name, Primitive method, int preced, int right ){
Object *s_new = define_slot(st, vt, name, (Object*)closure_new( st, method, 0, NULL ));
((String *)s_new)->preced = preced;
((String *)s_new)->right = right;
return s_new;
}
Object *define_primitive( State* st, Vtable* vt, char *name, Primitive method ) {
return define_slot(st, vt, name, (Object*)closure_new( st, method, 0, NULL ));
}
void object_list_slots( State* st, Closure* msg, Object *self ) {
Vtable *vt = self->_vt[-1];
Object *symbol;
printf("%s slots:\n", vt->name );
int i;
for( i = 0; i < vt->tally; ++i ) {
symbol = vt->bindings[i].key;
printf(" %s\n", ((String *)symbol)->chars );
}
}
void object_ret( State* st, Closure* msg, Object *self ) {
st->ret = 1;
}
void object_pop( State* st, Closure* msg, Object *self ) {
stack_pop( st );
}
void object_dup( State* st, Closure* msg, Object *self ) {
stack_push( st, stack_top(st) );
}
void object_def( State* st, Closure* msg, Object *self ) {
Object *literal = stack_pop(st);
String *name = (String*) stack_pop(st);
Object *rec = stack_top(st);
define_slot(st, rec->_vt[-1], name->chars, literal);
}
void number_add(State* st, Closure* msg, Object *self) {
Number *a = (Number*) stack_pop(st);
Number *b = (Number*) stack_pop(st);
Object *sum = number_new( st, a->num + b->num );
stack_push( st, sum );
}
void number_times(State* st, Closure* msg, Object *self) {
Number *a = (Number*) stack_pop(st);
Closure *cls = (Closure*) stack_pop(st);
while( a->num-- ) execute(st, cls, stack_top(st));
}
void number_print(State* st, Closure* msg, Object *self) {
printf("[%d] %d\n", st->sp, ((Number *) self)->num);
}
void string_print(State* st, Closure* msg, Object *self) {
printf("[%d] %s\n", st->sp, ((String *) self)->chars);
}
void closure_print(State* st, Closure* msg, Object *self) {
printf("[%d] <closure %p>\n", st->sp, self);
}
void closure_apply(State* st, Closure* msg, Object *self) {
Closure *cls = (Closure*) stack_pop(st);
execute(st, cls, stack_top(st));
}
void boolean_if_true(State* st, Closure* msg, Object *self) {
Object *boolean = stack_pop(st);
Closure *cls = (Closure*) stack_pop(st);
if(boolean == st->s_true)execute(st, cls, stack_top(st));
}
void boolean_if_false(State* st, Closure* msg, Object *self) {
Object *boolean = stack_pop(st);
Closure *cls = (Closure*) stack_pop(st);
if(boolean == st->s_false)execute(st, cls, stack_top(st));
}
void boolean_print(State* st, Closure* msg, Object *self) {
printf("[%d] <%s>\n", st->sp, self == st->s_true ? "true" : "false");
}
// * Executes the closure given as param.
void execute( State* st, Closure* cls, Object *self ) {
int ip = 0;
Instr *cur = cls->code;
while( cur->key && !st->ret ) {
// printf("[%d] exec: %p %s\n", ip, cur, ((String *)cur)->chars);
if( cur->kind == TOK_LIT ) stack_push(st, cur->key);
else send(st, stack_top(st), cur->key);
ip += 1;
cur = cls->code + ip;
}
st->ret = 0; // clear return flag
}
int parse_block(State* st, Instr *code, int top_level ) {
char symbol[BUFFER_SIZE];
int count = 0;
Instr sc, oper_stack[32]; // operator stack
int sl = 0; // stack length
int tok = utf8_next_symbol(st, st->infile, symbol);
while( tok != TOK_END && tok != '}' ) {
if( tok == TOK_NUMBER ){
code[count++] = (Instr){TOK_LIT, 0, 0, 0, number_new(st, atoi(symbol))};
} else if( tok == TOK_STRING ) {
code[count++] = (Instr){TOK_LIT, 0, 0, 0, string_new( st, symbol )};
} else if( tok == '{') {
Instr block_code[CODE_SIZE];
int block_count = parse_block(st, block_code, 0);
if( block_count < 0 ) die("Fatal: unterminated block\n");
Closure *cls = closure_new( st, execute, block_count, block_code );
code[count++] = (Instr){TOK_LIT, 0, 0, 0, (Object*) cls};
} else if( tok == TOK_OPERATOR || tok == TOK_SYMBOL ){
String *c = (String*)string_intern(st, symbol);
while( sl > 0 ) {
sc = oper_stack[sl - 1];
int assoc = (!c->right && (c->preced <= sc.preced)) || (c->preced < sc.preced);
if( !assoc ) break;
printf("%d: %s -> %s\n", count, symbol, ((String*)sc.key)->chars);
code[count++] = sc;
sl--; // Pop op2 off the stack
}
oper_stack[sl++] = (Instr){tok, 2, c->preced, c->right, (Object*)c }; // push op1 onto the stack.
// } else if(tok == TOK_SYMBOL ) {
// oper_stack[sl++] = (Instr){K_CALL, 0, 0, 0, string_intern(st, symbol)}; // push op1 onto the stack.
} else if(tok == '(') {
} else {
die("[%d] not a valid symbol: %s\n", tok, symbol);
}
tok = utf8_next_symbol(st, st->infile, symbol);
}
// When there are no more tokens to read and there are still operator tokens in the stack:
while( sl > 0 ) {
sc = oper_stack[sl - 1];
// if( sc == '(' || sc == ')' ) {
// printf("Error: parentheses mismatched\n");
// return false;
// }
code[count++] = sc;
sl--;
}
if( tok == TOK_END && !top_level ) return -1;
return count;
}
// interpret: eval top level block
void interpret( State* st ) {
Instr block_code[CODE_SIZE];
int count = parse_block(st, block_code, 1);
if( count > 0 ){
Closure *cls = closure_new( st, execute, count, block_code );
execute(st, cls, stack_top(st));
}
}
void define_primitives( State* st ) {
define_primitive(st, st->object_vt, "dup", object_dup);
define_primitive(st, st->object_vt, "pop", object_pop);
define_primitive(st, st->object_vt, "ret", object_ret); // this one should be an operator
define_primitive(st, st->object_vt, "def", object_def);
define_slot(st, st->object_vt, "true", st->s_true);
define_slot(st, st->object_vt, "false", st->s_false);
define_primitive(st, st->object_vt, "list_slots", object_list_slots);
define_primitive(st, st->number_vt, "print", number_print);
define_operator(st, st->number_vt, "+", number_add, 1, 0);
define_primitive(st, st->number_vt, "times", number_times);
define_primitive(st, st->string_vt, "print", string_print);
define_primitive(st, st->closure_vt, "apply", closure_apply);
define_primitive(st, st->closure_vt, "print", closure_print);
define_primitive(st, st->boolean_vt, "if_true", boolean_if_true);
define_primitive(st, st->boolean_vt, "if_false", boolean_if_false);
// define_primitive(st, st->boolean_vt, "while_true", boolean_while_true);
define_primitive(st, st->boolean_vt, "print", boolean_print);
}
// Main function!
int main(int argc, char **argv) {
if( argc != 2 ) {
printf("Usage: lys filename.lys\n");
return 0;
}
State* st = state_new();
st->infile = fopen( argv[1],"r" );
if( !st->infile ) {
perror("open");
exit(1);
}
st->ch = getc(st->infile);
define_primitives( st );
interpret( st );
fclose(st->infile);
return 0;
}
/*
int priority(c) {
switch(c){
case '+': return 1;
case '*': return 2;
case '^': return 3;
default: return 0;
}
}
int RightAssoc(c) {
if( c == '^') return 1;
else return 0;
}
void parseExpr(prec) {
p = parsePrimary();
while(priority(input) >= prec){
let oper = get(input);
let oprec = Priority(oper);
p := mknode(p, oper, parseE(if RightAssoc(oper) then oprec else oprec+1))
}
}
int parsePrimary() {
switch( tok ){
case '(':
parseExpr(1);
if( tok != ')') die("paren mismatched\n");
return p
case '{':
case TOK_NUMBER:
case TOK_STRING:
default:
die("unknown\n");
}
}
*/
#!/bin/python2.5
#
# Copyright (c) 2008 Samuel A. Falvo II
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the
# distribution.
#
# * Neither Samuel A. Falvo II, Falvotech, nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""A super-simple, quick-n-dirty Io-syntax lexer and parser.
This parser/lexer combination is used to parse an Io-syntax source string
into a hierarchy of Node objects. An entire tree is returned for the
string.
NOTE
This parser is not really intended for mission-critical software, since
it was written only as an exploratory piece (e.g., how difficult is it to
parse Io-syntax sources?). If you're interested in using it in real-
world software, you do so at your own risk. :)
It currently lacks support for comments, but otherwise seems complete.
"""
__author__ = "Samuel A. Falvo II <[email protected]>"
__version__ = "1"
__license__ = "Simplified BSD"
__date__ = "2008 Feb 23"
import string
class Node(object):
"""A node in the abstract syntax tree. This 'class' is really used as a
structure; it has no significant methods of its own, and is intended to
be augmented by subsequent compiler passes, if required.
Fields:
name = the name of a lexical unit. Identifiers start with a letter or
underscore. Numbers start with a digit. Strings start with the
double-quote character ("). Punctuation symbols represent them-
selves. In some cases, this field may be an empty string.
arguments = A list of _programs_, where a program is itself a list of
Node objects. Hence, to get the first symbol of the first
argument, reference arguments[0][0], not just arguments[0].
"""
def __init__(self, name):
self.name = name
self.arguments = []
def __eq__(self, anotherNode):
"""Only for the benefit of the unit tests."""
return self.name == anotherNode.name
def isspace(aCharacter):
return aCharacter in string.whitespace
def isalpha(aCharacter):
return (aCharacter in string.letters) or (aCharacter == '_')
def isIdentifier(aCharacter):
return isalpha(aCharacter) or isdigit(aCharacter) or (aCharacter == '?')
def isdigit(aCharacter):
return aCharacter in string.digits
endOfInput = -1
class Lexer(object):
"""Lexes an input string for parsable symbols.
Implements a single token look-ahead.
Fields:
current = the current, or pending, symbol in the source text to be
consumed by a parser. Will be set to endOfInput when there
is no more input data to lex. Strings will include their
leading quotes.
next = the next after current symbol in the input stream.
"""
def __init__(self, string):
self.input = string
self.current = None
self.next = None
self.pump()
self.pump()
def pump(self):
"""Consumes a single lexical token. What was in next
now appears in current, and a new value is loaded into
next.
"""
self.current = self.next
self.lex()
def pendingCharacter(self):
return self.input[0]
def advanceCharacter(self):
self.input = self.input[1:]
def parseIdentifier(self):
largestIndex = len(self.input)
i = 0
while (i < largestIndex) and isIdentifier(self.input[i]):
i = i+1
self.next = self.input[0:i]
self.input = self.input[i:]
def parseNumber(self):
largestIndex = len(self.input)
i = 0
while (i < largestIndex) and isdigit(self.input[i]):
i = i + 1
self.next = self.input[0:i]
self.input = self.input[i:]
def parseQuotedString(self):
largestIndex = len(self.input)
i = 1 # index 0 is known to be a quote.
while (i < largestIndex) and (self.input[i] != '"'):
i = i + 1
self.next = self.input[0:i+1]
self.input = self.input[i+1:]
def lex(self):
if self.input == "":
self.next = endOfInput
elif self.pendingCharacter() == '\n':
self.next = ";"
self.advanceCharacter()
elif isspace(self.pendingCharacter()):
self.advanceCharacter()
self.lex()
elif isalpha(self.pendingCharacter()):
self.parseIdentifier()
elif isdigit(self.pendingCharacter()):
self.parseNumber()
elif self.pendingCharacter() == '"':
self.parseQuotedString()
else:
self.next = self.pendingCharacter()
self.advanceCharacter()
def parseArguments(theLexer):
args = []
arg = []
while theLexer.current != endOfInput:
if theLexer.current == ')':
if arg:
args.append(arg)
break
elif theLexer.current == ',':
args.append(arg)
arg = []
theLexer.pump()
else:
arg = parseExpression(theLexer)
return args
def parseExpression(theLexer):
tree = []
while theLexer.current != endOfInput:
if theLexer.current == ",":
break
elif theLexer.current == ")":
break
elif theLexer.current == "(":
theLexer.pump()
args = parseArguments(theLexer)
if theLexer.current == ")":
if not tree:
tree = [Node("")]
if tree[-1].arguments:
tree.append(Node(""))
tree[-1].arguments = args
theLexer.pump()
else:
print "Syntax Error: ')' expected"
else:
tree.append(Node(theLexer.current))
theLexer.pump()
return tree
def parse(aString):
l = Lexer(aString)
return parseExpression(l)
// code from https://github.com/proglangclass/vm
// copied in one file for easy inspection
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// runtime
enum { tObject, tNumber, tString };
typedef struct {
char type;
union {
char *string;
int number;
} value;
int refcount;
} Object;
static Object *TrueObject;
static Object *FalseObject;
static Object *NilObject;
Object *Object_new() {
return calloc(1, sizeof(Object));
}
/////////////// GC ///////////////
Object *retain(Object *self) {
self->refcount++;
return self;
}
void release(Object *self) {
self->refcount--;
if (self->refcount <= 0)
free(self);
}
///////////////////////////////////
char Object_is_true(Object *self) {
// false and nil are == false, everything else is true.
if (self == FalseObject || self == NilObject) return 0;
return 1;
}
Object *Number_new(int value) {
Object *object = Object_new();
object->type = tNumber;
object->value.number = value;
return object;
}
int Number_value(Object *self) {
assert(self->type == tNumber);
return self->value.number;
}
Object *String_new(char *value) {
Object *object = Object_new();
object->type = tString;
object->value.string = value;
return object;
}
Object *call(Object *receiver, char *message, Object *argv[], int argc) {
// HACK hardcoded methods. In a real runtime, you'd have proper classes and methods.
// See runtime.rb in our interpreter for an example.
if (strcmp(message, "+") == 0) {
// Number#+
return Number_new(receiver->value.number + argv[0]->value.number);
} else if (strcmp(message, "print") == 0) {
// Object#print
switch (argv[0]->type) {
case tNumber:
printf("%d\n", argv[0]->value.number);
return argv[0];
case tString:
printf("%s\n", argv[0]->value.string);
return argv[0];
}
}
return 0;
}
void init_runtime() {
TrueObject = retain(Object_new());
FalseObject = retain(Object_new());
NilObject = retain(Object_new());
}
void destroy_runtime() {
release(TrueObject);
release(FalseObject);
release(NilObject);
}
// opcode
enum {
// ------- Stack -------
// Opcode Operands before after
/* 00 */ CALL, // index, argc [rcv, arg...] [returned]
/* 01 */ PUSH_NUMBER, // index [] [number]
/* 02 */ PUSH_STRING, // index [] [string]
/* 03 */ PUSH_SELF, // [] [self]
/* 04 */ PUSH_NIL, // [] [nil]
/* 05 */ PUSH_BOOL, // 1=t, 0=f [] [true or false]
/* 06 */ GET_LOCAL, // index [] [value]
/* 07 */ SET_LOCAL, // index [value] []
/* 08 */ JUMP_UNLESS, // offset [test] []
/* 09 */ JUMP, // offset [] []
/* 10 */ ADD, // [a, b] [result]
/* 11 */ RETURN // [] []
};
typedef unsigned char byte; // 1 byte
// Helpers to play with the stack
#define LOCALS_MAX 10
#define STACK_MAX 10
#define STACK_PUSH(I) do { \
assert(sp-stack < STACK_MAX); \
*(++sp) = (I); \
retain(*sp); \
} while(0)
#define STACK_POP() (*sp--)
void run(void *literals[], byte instructions[]) {
byte *ip = instructions; // instruction pointer
Object *stack[STACK_MAX]; // THE famous stack
Object **sp = stack; // stack pointer, keeps track of current position
Object *locals[LOCALS_MAX] = {}; // where we store our local variables
// Setup the runtime
Object *self = Object_new();
retain(self);
// Start processing instructions
while (1) {
switch (*ip) {
case PUSH_NUMBER: {
ip++; // advance to the operand (literal index)
STACK_PUSH(Number_new((int)literals[*ip]));
break;
}
case PUSH_STRING: {
ip++; // advance to the operand (literal index)
STACK_PUSH(String_new((char *)literals[*ip]));
break;
}
case PUSH_SELF: {
STACK_PUSH(self);
break;
}
case PUSH_NIL: {
STACK_PUSH(NilObject);
break;
}
case PUSH_BOOL: {
ip++; // advance to operand (0 = false, 1 = true)
if (*ip == 0) {
STACK_PUSH(FalseObject);
} else {
STACK_PUSH(TrueObject);
}
break;
}
case CALL: {
ip++; // advance to operand (method name index in literals)
char *method = literals[*ip];
ip++; // advance to operand (# of args)
int argc = *ip;
Object *argv[10];
int i;
for (i = argc - 1; i >= 0; i--) argv[i] = STACK_POP();
Object *receiver = STACK_POP();
Object *result = call(receiver, method, argv, argc);
STACK_PUSH(result);
// Release all the objects
for (i = argc - 1; i >= 0; i--) release(argv[i]);
release(receiver);
break;
}
case RETURN: {
goto cleanup;
break;
}
case GET_LOCAL: {
ip++; // advance operand (local index)
STACK_PUSH(locals[*ip]);
break;
}
case SET_LOCAL: {
ip++; // local index
locals[*ip] = STACK_POP();
break;
}
case ADD: {
Object *a = STACK_POP();
Object *b = STACK_POP();
STACK_PUSH(Number_new(Number_value(a) + Number_value(b)));
release(a);
release(b);
break;
}
case JUMP_UNLESS: {
ip++; // operand (offset, # of bytes to jump forward)
byte offset = *ip;
Object *condition = STACK_POP();
if (!Object_is_true(condition)) ip += offset;
release(condition);
break;
}
case JUMP: {
ip++; // operand (offset, # of bytes to jump forward)
byte offset = *ip;
ip += offset;
break;
}
}
ip++;
}
cleanup:
release(self);
int i;
// Release all the local variables
for (i = 0; i < LOCALS_MAX; i++) if (locals[i]) release(locals[i]);
// Release everything left on the stack
while (sp > stack) release(STACK_POP());
}
// bytecode
/*
print("the answer is:")
a = 30 + 2
if false
print(a)
else
print("no")
end
*/
#define LITERALS { \
(void *) "the answer is:", \
(void *) "print", \
(void *) 30, \
(void *) 2, \
(void *) "no", \
}
#define INSTRUCTIONS { 3, 2, 0, 0, 1, 1, 1, 2, 1, 3, 10, 7, 0, 5, 0, 8, 8, 3, 6, 0, 0, 1, 1, 9, 6, 3, 2, 4, 0, 1, 1, 11 }
int main (int argc, char const *argv[]) {
// Get compiled literals table and instructions from bytecode.h
void *literals[] = LITERALS;
byte instructions[] = INSTRUCTIONS;
init_runtime();
// Run that thing!
run(literals, instructions);
destroy_runtime();
return 0;
}
'''
Implements the Straw Man (SM) language. SM is intended to explore
a minimal scripting language. This module is a prototype and has undergone
heavy tweaking, and has many known bugs. However,
the test cases at the end do all work more or less as promised.
To get a feel for SM, run the modules and examine the output of each test
case.
'''
from copy import copy
import re
# define a token in the straw-main language.
tokenRE = re.compile(r'".*?"|\(|\)|\}|\{|;|[^(){}\s]+')
# used to detect proper symbols; everything else is assumed to be a literal.
symbolRE = re.compile(r'[a-z][a-z0-9_]*')
class Node(object):
'An empty symbol node.'
def __init__(self, symbol):
self.sym = symbol
self.args = []
self.val = symbol
def __str__(self):
if not self.args:
return str(self.val)
else:
arg_strings = map(str, self.args)
return "%s(%s)" % (self.val, " ".join(arg_strings))
class DoNode(Node):
'An empty "do" symbol node, which uses the {} syntactic sugar.'
def __init__(self):
self.sym = 'do'
self.args = []
def __str__(self):
return "{%s}" % "; ".join(map(str, self.args))
class ValueNode(Node):
'A value node bound to a particular value at creation.'
def __init__(self, value):
self.sym = 'value'
self.val = value
self.args = []
def val(self):
return self.val
class Stack(list):
' a thin wrapper around list() to make it more stack-like.'
def peek(self):
' returns the top of the stack. '
return self[-1]
def push(self, top):
' adds an element to the top of the stack. '
self.append(top)
def parse(string):
'returns an AST for the SM grammer.'
main = Node('do')
# I love this trick; when working with any
# kind of tree, we can often avoid treating
# the root as a special case by simply
# starting with a dummy node above the root.
context = Stack()
context.push(main)
ast = None
for match in tokenRE.finditer(string):
token = match.group(0)
if token == ";":
if context.peek().sym != 'do':
raise SyntaxError, "misplaced semicolon!"
elif token == '(':
if context.peek() == ast:
raise SyntaxError, "'(' does not follow symbol!"
context.push(ast)
elif token == ')':
context.pop()
elif token == '{':
ast = DoNode()
context.peek().args.append(ast)
context.push(ast)
elif token == '}':
if context.peek().sym != 'do':
raise SyntaxError, "'}' terminating non-do block!"
context.pop()
elif symbolRE.match(token):
ast = Node(token)
context.peek().args.append(ast)
else: # must be a literal
ast = ValueNode(eval(token))
context.peek().args.append(ast)
return main
### eval functions for each primitive ###
def indexSafe(f):
'decorator to handle a common exception'
def newF(*x, **y):
try:
return f(*x, **y)
except:
return ValueNode('')
return newF
@indexSafe
def evalIf(ast, env):
if evaluate(ast.args[0], env):
return evaluate(ast.args[1], env)
else:
return evaluate(ast.args[2], env)
@indexSafe
def evalSet(ast, env):
name = ast.args[0].sym
value = evaluate(ast.args[1], env)
env[name] = value
return value
@indexSafe
def evalGt(ast, env):
left = int(evaluate(ast.args[0], env).val)
right = int(evaluate(ast.args[1], env).val)
return 'true' if left > right else ''
@indexSafe
def evalPrint(ast,env):
ret = ''
for arg in ast.args:
ret = evaluate(arg, env)
print ret,
print
return ret
@indexSafe
def evalDo(ast,env):
ret = ''
for arg in ast.args:
if arg.sym == 'return':
if len(arg.args) == 0: return ''
return evaluate(arg.args[0], env)
else:
ret = evaluate(arg, env)
return ret
@indexSafe
def evalAdd(ast,env):
left = int(evaluate(ast.args[0], env).val)
right = int(evaluate(ast.args[1], env).val)
return ValueNode(str(left + right))
@indexSafe
def evalEqual(ast,env):
left = evaluate(ast.args[0], env)
right = evaluate(ast.args[1], env)
return True if left == right else False
@indexSafe
def evalQuote(ast, env):
return copy(ast.args[0])
@indexSafe
def evalEval(ast, env):
return evaluate(evaluate(ast.args[0], env), env)
@indexSafe
def evalString(ast, env):
return ValueNode(str(ast))
@indexSafe
def evalParse(ast, env):
string = str(evaluate(ast.args[0], env).val)
return parse(string)
@indexSafe
def evalLambda(ast, env):
parameters = ast.args[0].args # not evaluated!
expression = ast.args[1]
l = Node('function')
p = Node('parameters')
p.args = parameters
l.args = [p, expression ]
return l
def evalData(ast, env):
return ast
@indexSafe
def evalProduct(ast, env):
left = int(evaluate(ast.args[0],env).val)
right = int(evaluate(ast.args[1],env).val)
return ValueNode(str(left * right))
@indexSafe
def evalVal(ast, env):
return ValueNode(evaluate(ast.args[0], env).val)
@indexSafe
def evalFor(ast, env):
'for(init,condition,increment,body)'
evaluate(ast.args[0], env)
ret = ValueNode('')
while evaluate(ast.args[1], env):
ret = evaluate(ast.args[3], env)
evaluate(ast.args[2], env)
return ret
@indexSafe
def evalForEach(ast, env):
symbol = ast.args[0].sym
collection = evaluate(ast.args[1],env)
body = ast.args[2]
ret = ValueNode('')
for arg in collection.args:
env[symbol] = arg
ret = evaluate(body,env)
return ret
keywords = {'do':evalDo,
'if':evalIf,
'set':evalSet,
'gt':evalGt,
'print':evalPrint,
'add':evalAdd,
'eq':evalEqual,
'quote':evalQuote,
'q':evalQuote,
'eval':evalEval,
'string':evalString,
'parse':evalParse,
'data':evalData,
'd':evalData,
'lambda':evalLambda,
'product':evalProduct,
'val':evalVal,
'for':evalFor,
'foreach':evalForEach,
}
def evaluate(ast, env={}):
' evaluates an Node given an environment.'
if ast.sym in keywords:
return keywords[ast.sym](ast, env)
elif ast.sym in env:
value = env[ast.sym]
if hasattr(value,'sym') and value.sym == 'function':
return functionCall(value, ast, env)
else:
return value
else:
return ast
def functionCall(function, call, env):
arguments = [ evaluate(arg, env) for arg in call.args ]
parameters = [ param.sym for param in function.args[0].args]
code = function.args[1]
localEnv = dict(zip(parameters, arguments))
localEnv.update(env)
return evaluate(code, localEnv)
def run(codeString,env={}):
'parses and evaluates an SM expression.'
return evaluate(parse(codeString),env)
tests = []
tests.append( '''
set(x 1)
for(set(i 0) gt(8 i) set(i add(i 1)) {
print("2 to the " i " = " x)
set(x add(x x))
})
''')
tests.append( '''
set(x 17);
set(y 8);
print( "x=" x "y=" y);
print( "changing x...");
set(x 9);
print( "x=" x "y=" y);
if( gt(x y)
{ print("x is greater than y."); set(z x); }
{ print("x is not greater than y."); set(z y); }
);
print( "greatest of x and y:" z );
print( "total of x and y:" add( x y) );
''')
tests.append( '''
set(string "Love");
if ( eq(string "Love")
{ set(string "Hate");}
)
print("string = " string)
return(z);
print( "not reached!" );
''')
tests.append( '''
set(code quote(print ("hello world!")) )
print ("code = '" code "'")
print ("when I evaluate the code, I should see 'hello world' as a side effect:")
eval(code)''')
tests.append( '''
set(x 1)
set(y 2)
set( sum quote(add(x y)) )
print ( "sum = " sum )
print ( "add(x y) = " add(x y))
print ( "eval(sum) = " eval(sum) )
print ( "setting x to 10..." )
set(x 10)
print ( "eval(sum) = " eval(sum) )
''')
tests.append( '''
set(code_string "print(hello world!)")
set(code parse(code_string))
print (code_string)
print (code)
eval(code)
''')
tests.append( '''
set(f lambda ( quote(x) add(x 1) ))
print( f(2))
set(metric lambda ( quote(x y) add(product(x x) product(y y)) ) )
print( metric(5 10) )
print( metric(3 4) )
''')
tests.append( '''
for( set(i 0) gt(10 i) set(i add(i 1)) {
print("i=" i)
})
''')
tests.append( '''
set(list data(1 2 3))
print(list)
foreach(element list {
print(element)
})
''')
tests.append( '''
set(tree data("Friends" ("John" "Jill") "Pets" ("Spot" "Tiger") ))
print(tree)
foreach(branch tree {
print(val(branch) ":")
foreach(leaf branch {
print(" " leaf)
})
})
''')
for t in tests:
print "code:",t
print "output:"
run(t,{})
print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment