Skip to content

Instantly share code, notes, and snippets.

@jdp
Created December 7, 2010 04:59
Show Gist options
  • Save jdp/731475 to your computer and use it in GitHub Desktop.
Save jdp/731475 to your computer and use it in GitHub Desktop.
A toy tail-recursive concatenative language implementation
#!/usr/bin/env python
import sys
import types
import operator
class Runtime:
def __init__(self, env={}, stack=[]):
self.env = {
# Primitive words, not an impressive base but it works
'.': lambda s,r: sys.stdout.write(repr(s.pop()) + "\n"),
'+': lambda s,r: self._math(s, operator.add),
'-': lambda s,r: self._math(s, operator.sub),
'*': lambda s,r: self._math(s, operator.mul),
'/': lambda s,r: self._math(s, operator.div),
'mod': lambda s,r: self._math(s, operator.mod),
'>': lambda s,r: self._math(s, operator.gt),
'<': lambda s,r: self._math(s, operator.lt),
'=': lambda s,r: self._math(s, operator.eq),
'f': lambda s,r: s.append(False),
'not': lambda s,r: s.append(not s.pop()),
'drop': lambda s,r: s.pop(),
'dup': lambda s,r: s.append(s[-1]),
'swap': self._swap,
'over': lambda s,r: s.append(s[-2]),
'clear': self._clear,
'quote': lambda s,r: s.append(list(s.pop())),
'call': self._call,
'r>': lambda s,r: r.append(s.pop()),
'>r': lambda s,r: s.append(r.pop()),
'?': self._which,
'curry': self._curry,
# Non-primitive words -- in essence, the prelude
'dip': ['swap', 'r>', 'call', '>r'],
'if': ['?', 'call'],
'when': ['swap', ['call'], ['drop'], 'if'],
'keep': ['over', ['call'], 'dip'],
'loop': [['call'], 'keep', ['loop'], 'curry', 'when']
}
self.env.update(env)
self.stack = stack
self.frames = [] # Fudge of Factor's callstack
self.retain = [] # The retain stack, for >r and r>
def _swap(self, stack, retain):
stack[-1], stack[-2] = stack[-2], stack[-1]
def _clear(self, stack, retain):
del stack[:]
def _which(self, stack, retain):
falseq = stack.pop()
trueq = stack.pop()
stack.append(trueq if stack.pop() else falseq)
def _curry(self, stack, retain):
quot = stack.pop()[:]
quot.insert(0, stack.pop())
stack.append(quot)
def _call(self, stack, retain):
if self.ptr < len(self.nodes) - 1:
self.frames.append((self.nodes, self.ptr + 1))
self.ptr, self.nodes = -1, self.stack.pop()
def _math(self, stack, func):
rhs = stack.pop()
lhs = stack.pop()
stack.append(func(lhs, rhs))
def evaluate(self, nodes):
self.nodes = nodes
self.retain, self.frames, self.ptr = [], [], 0
while True:
try:
node = self.nodes[self.ptr]
# Strings are words, so a lookup is attempted.
if isinstance(node, str):
# Check if the word is defined in the environment.
if self.env.has_key(node):
defn = self.env[node]
# Python lists in the environment are quotations.
# They are from the prelude, or they are user-defined words.
if isinstance(defn, list):
# A call frame is only saved if the word being called
# is not the last word in the quotation.
# Tail-call optimization achieved. :)
if self.ptr < len(self.nodes) - 1:
self.frames.append((self.nodes, self.ptr + 1))
self.ptr, self.nodes = -1, defn
else:
defn(self.stack, self.retain)
# To keep with Factor syntax, the : word-name definition... ;
# design choice was made. It would be entirely possible to
# have definitions be postfix as well, and probably more
# consistent with the implementation.
elif ':' == node:
name = self.nodes[self.ptr + 1]
body = []
self.ptr += 2
# Build a quotation until ; word is found, and add it to
# the environment.
while self.nodes[self.ptr] != ';':
body.append(self.nodes[self.ptr])
self.ptr += 1
self.env[name] = body
else:
raise NameError, "word %s not defined" % node
else:
self.stack.append(node)
self.ptr += 1
# End of quotation reached, so pop off the call frame and
# start from there.
except IndexError:
try:
(self.nodes, self.ptr) = self.frames.pop()
# No more call frames. Program is terminated.
except IndexError:
return self.stack
return self.stack
def read(s):
return parse(tokenize(s))
def tokenize(s):
return s.split()
def parse(tokens, depth=0):
program = []
if len(tokens) == 0:
return program
while len(tokens) > 0:
token = tokens.pop(0)
if '[' == token:
program.append(parse(tokens))
elif ']' == token:
return program
else:
program.append(atom(token))
return program
def atom(token):
"Numbers become numbers; every other token is a symbol."
try: return int(token)
except ValueError:
try: return float(token)
except ValueError:
return str(token)
def repl():
runtime = Runtime()
while True:
result = runtime.evaluate(read(raw_input("> ")))
print "-- data stack --"
print "\n".join(map(lambda el: repr(el), result))
if __name__ == '__main__':
repl()
@jdp
Copy link
Author

jdp commented Dec 8, 2010

This is a very simple toy implementation of the Factor programming language, inspired by the (How to Write a (Lisp) Interpreter (in Python)) tutorial. It even cribs some of the code from it directly. It supports quotations, numbers, word definitions, and tail calls. If you're unfamiliar with concatenative languages, try the concatenative.org wiki, as it's an excellent jumping off point.

On the interpreter: most of the space is taken up by the primitive and word definitions, the meat of it is in the evaluatemethod of Runtime. Rather than explain it here, I tried to comment the inner workings of the interpreter in the evaluate method. Like lis.py, it treats a Python data types as the counterpart data type in Factor, so Python lists are Factor quotations, strings are words, and numbers are numbers.

The tokenize function just splits a string by whitespace, in this implementation there are no strings so it's safe to assume each lexeme is just a whitespace-delimited sequence of characters. For example, the input [ 1 2 + ] call is split turned into ['[', '1', '2', '+', ']', 'call'].

The parse function takes a list of tokens and builds one big quotation out of it. Quotations are represented as Python lists, so it's valid input for the evaluate method. Going on the example from before, the tokens are parsed into a list looking like [[1, 2, '+'], 'call'].

The resulting list is finally passed into the evaluator. At the highest level, the evaluator just operates on a single quotation. When a string (a word) is encountered, the evaluator looks it up in the env variable and tries to apply the associated value. If it's a Python callable, it's called, and if it's a list, the current frame is pushed to the call stack and the evaluator starts working on the list associated with the word. Tail-calls are implemented by not pushing the current frame if the word is the last element in the quotation. Any other value encountered by the evaluator is pushed to the data stack.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment