Created
December 7, 2010 04:59
-
-
Save jdp/731475 to your computer and use it in GitHub Desktop.
A toy tail-recursive concatenative language implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import types | |
import operator | |
class Runtime: | |
def __init__(self, env={}, stack=[]): | |
self.env = { | |
# Primitive words, not an impressive base but it works | |
'.': lambda s,r: sys.stdout.write(repr(s.pop()) + "\n"), | |
'+': lambda s,r: self._math(s, operator.add), | |
'-': lambda s,r: self._math(s, operator.sub), | |
'*': lambda s,r: self._math(s, operator.mul), | |
'/': lambda s,r: self._math(s, operator.div), | |
'mod': lambda s,r: self._math(s, operator.mod), | |
'>': lambda s,r: self._math(s, operator.gt), | |
'<': lambda s,r: self._math(s, operator.lt), | |
'=': lambda s,r: self._math(s, operator.eq), | |
'f': lambda s,r: s.append(False), | |
'not': lambda s,r: s.append(not s.pop()), | |
'drop': lambda s,r: s.pop(), | |
'dup': lambda s,r: s.append(s[-1]), | |
'swap': self._swap, | |
'over': lambda s,r: s.append(s[-2]), | |
'clear': self._clear, | |
'quote': lambda s,r: s.append(list(s.pop())), | |
'call': self._call, | |
'r>': lambda s,r: r.append(s.pop()), | |
'>r': lambda s,r: s.append(r.pop()), | |
'?': self._which, | |
'curry': self._curry, | |
# Non-primitive words -- in essence, the prelude | |
'dip': ['swap', 'r>', 'call', '>r'], | |
'if': ['?', 'call'], | |
'when': ['swap', ['call'], ['drop'], 'if'], | |
'keep': ['over', ['call'], 'dip'], | |
'loop': [['call'], 'keep', ['loop'], 'curry', 'when'] | |
} | |
self.env.update(env) | |
self.stack = stack | |
self.frames = [] # Fudge of Factor's callstack | |
self.retain = [] # The retain stack, for >r and r> | |
def _swap(self, stack, retain): | |
stack[-1], stack[-2] = stack[-2], stack[-1] | |
def _clear(self, stack, retain): | |
del stack[:] | |
def _which(self, stack, retain): | |
falseq = stack.pop() | |
trueq = stack.pop() | |
stack.append(trueq if stack.pop() else falseq) | |
def _curry(self, stack, retain): | |
quot = stack.pop()[:] | |
quot.insert(0, stack.pop()) | |
stack.append(quot) | |
def _call(self, stack, retain): | |
if self.ptr < len(self.nodes) - 1: | |
self.frames.append((self.nodes, self.ptr + 1)) | |
self.ptr, self.nodes = -1, self.stack.pop() | |
def _math(self, stack, func): | |
rhs = stack.pop() | |
lhs = stack.pop() | |
stack.append(func(lhs, rhs)) | |
def evaluate(self, nodes): | |
self.nodes = nodes | |
self.retain, self.frames, self.ptr = [], [], 0 | |
while True: | |
try: | |
node = self.nodes[self.ptr] | |
# Strings are words, so a lookup is attempted. | |
if isinstance(node, str): | |
# Check if the word is defined in the environment. | |
if self.env.has_key(node): | |
defn = self.env[node] | |
# Python lists in the environment are quotations. | |
# They are from the prelude, or they are user-defined words. | |
if isinstance(defn, list): | |
# A call frame is only saved if the word being called | |
# is not the last word in the quotation. | |
# Tail-call optimization achieved. :) | |
if self.ptr < len(self.nodes) - 1: | |
self.frames.append((self.nodes, self.ptr + 1)) | |
self.ptr, self.nodes = -1, defn | |
else: | |
defn(self.stack, self.retain) | |
# To keep with Factor syntax, the : word-name definition... ; | |
# design choice was made. It would be entirely possible to | |
# have definitions be postfix as well, and probably more | |
# consistent with the implementation. | |
elif ':' == node: | |
name = self.nodes[self.ptr + 1] | |
body = [] | |
self.ptr += 2 | |
# Build a quotation until ; word is found, and add it to | |
# the environment. | |
while self.nodes[self.ptr] != ';': | |
body.append(self.nodes[self.ptr]) | |
self.ptr += 1 | |
self.env[name] = body | |
else: | |
raise NameError, "word %s not defined" % node | |
else: | |
self.stack.append(node) | |
self.ptr += 1 | |
# End of quotation reached, so pop off the call frame and | |
# start from there. | |
except IndexError: | |
try: | |
(self.nodes, self.ptr) = self.frames.pop() | |
# No more call frames. Program is terminated. | |
except IndexError: | |
return self.stack | |
return self.stack | |
def read(s): | |
return parse(tokenize(s)) | |
def tokenize(s): | |
return s.split() | |
def parse(tokens, depth=0): | |
program = [] | |
if len(tokens) == 0: | |
return program | |
while len(tokens) > 0: | |
token = tokens.pop(0) | |
if '[' == token: | |
program.append(parse(tokens)) | |
elif ']' == token: | |
return program | |
else: | |
program.append(atom(token)) | |
return program | |
def atom(token): | |
"Numbers become numbers; every other token is a symbol." | |
try: return int(token) | |
except ValueError: | |
try: return float(token) | |
except ValueError: | |
return str(token) | |
def repl(): | |
runtime = Runtime() | |
while True: | |
result = runtime.evaluate(read(raw_input("> "))) | |
print "-- data stack --" | |
print "\n".join(map(lambda el: repr(el), result)) | |
if __name__ == '__main__': | |
repl() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is a very simple toy implementation of the Factor programming language, inspired by the (How to Write a (Lisp) Interpreter (in Python)) tutorial. It even cribs some of the code from it directly. It supports quotations, numbers, word definitions, and tail calls. If you're unfamiliar with concatenative languages, try the concatenative.org wiki, as it's an excellent jumping off point.
On the interpreter: most of the space is taken up by the primitive and word definitions, the meat of it is in the
evaluate
method ofRuntime
. Rather than explain it here, I tried to comment the inner workings of the interpreter in theevaluate
method. Like lis.py, it treats a Python data types as the counterpart data type in Factor, so Python lists are Factor quotations, strings are words, and numbers are numbers.The
tokenize
function just splits a string by whitespace, in this implementation there are no strings so it's safe to assume each lexeme is just a whitespace-delimited sequence of characters. For example, the input[ 1 2 + ] call
is split turned into['[', '1', '2', '+', ']', 'call']
.The
parse
function takes a list of tokens and builds one big quotation out of it. Quotations are represented as Python lists, so it's valid input for theevaluate
method. Going on the example from before, the tokens are parsed into a list looking like[[1, 2, '+'], 'call']
.The resulting list is finally passed into the evaluator. At the highest level, the evaluator just operates on a single quotation. When a string (a word) is encountered, the evaluator looks it up in the
env
variable and tries to apply the associated value. If it's a Python callable, it's called, and if it's a list, the current frame is pushed to the call stack and the evaluator starts working on the list associated with the word. Tail-calls are implemented by not pushing the current frame if the word is the last element in the quotation. Any other value encountered by the evaluator is pushed to the data stack.