Skip to content

Instantly share code, notes, and snippets.

@jvkersch
Created December 12, 2017 18:11
Show Gist options
  • Save jvkersch/3d693b51b8540288fb0c19f9ca9d0b59 to your computer and use it in GitHub Desktop.
Save jvkersch/3d693b51b8540288fb0c19f9ca9d0b59 to your computer and use it in GitHub Desktop.
scheme interpreter
#!/usr/bin/env python3
# TODO
# 2. support block structure
from collections import deque, namedtuple
import contextlib
from functools import reduce
import operator
import re
import sys
# Types
Token = namedtuple('Token', ['kind', 'value', 'line', 'column'])
Tree = namedtuple('Tree', ['value', 'children', 'primitive'])
# Token types
INTEGER = 'integer'
FLOAT = 'float'
NUMBER = 'number'
STRING = 'string'
OPEN_BRACKET = 'open_bracket'
CLOSE_BRACKET = 'close_bracket'
NEWLINE = 'newline'
SKIP = 'skip'
LITERAL = 'literal'
MISMATCH = 'mismatch'
# Token specification
NUMBERS = (INTEGER, FLOAT)
PRIMITIVE = NUMBERS + (STRING,)
TOKEN_SPEC = [
(OPEN_BRACKET, r'\('),
(CLOSE_BRACKET, r'\)'),
(NUMBER, r'\d+(\.\d*)?'),
(STRING, r'".*"'),
(NEWLINE, r'\n'),
(SKIP, r'[ \t]+'),
(LITERAL, r'[^\s\(\)"]+'),
(MISMATCH, r'.'),
]
TOKEN_RE = '|'.join('(?P<{}>{})'.format(*pair) for pair in TOKEN_SPEC)
TOKEN_RE = re.compile(TOKEN_RE)
# Exceptions
class InterpreterError(Exception):
pass
class ParseError(InterpreterError):
def __init__(self, msg, line=-1, column=-1):
if line != -1 and column != -1:
msg = f"{msg} (line {line}, column {column})"
super(InterpreterError, self).__init__(msg)
self.line = line
self.column = column
@classmethod
def from_token(cls, msg, token):
return cls(msg, line=token.line, column=token.column)
class SymbolError(InterpreterError):
def __init__(self, symname):
super(SymbolError, self).__init__(f"Unknown symbol: {symname}")
class SyntacticError(InterpreterError):
pass
class EvaluationError(InterpreterError):
def __init__(self, runtime_msg):
super(EvaluationError, self).__init__(f"Runtime error: {runtime_msg}")
# Functions built into the interpreter and user-defined symbols
class Function:
def __init__(self, parameters, body):
self.arg_names = self._validate_argnames(parameters)
self.body = body
def _validate_argnames(self, parameters):
argnames = []
for p in parameters:
name = p.value
if name is None or len(p.children) > 0:
raise SyntacticError("Function value cannot be compound")
if p.primitive:
raise SyntacticError(f"Cannot use primitive value {name} "
f"as a formal parameter")
argnames.append(name)
return argnames
def __str__(self):
return "function {} => <body>".format(', '.join(self.arg_names))
def __call__(self, args):
if len(args) != len(self.arg_names):
raise EvaluationError("Wrong number of arguments")
named_args = {
name: eval_tree(arg) for name, arg in zip(self.arg_names, args)
}
with on_stack(named_args):
result = eval_tree(self.body)
return result
def _define_symbol_builtin(args):
if len(args) != 2:
raise SyntacticError("Wrong number of arguments in define expression")
symbname, symbval = args
if symbname.value is not None:
# defining a variable
if symbname.primitive:
raise SyntacticError("Attempting to define a primitive value.")
USER_DEFINED[symbname.value] = val = eval_tree(symbval)
return f"{symbname.value} <- {val}"
else:
# defining a function
funcname, *funcargs = symbname.children
USER_DEFINED[funcname.value] = f = Function(funcargs, symbval)
return f"{funcname.value} <- {f}"
def _if_symbol_builtin(args):
if len(args) != 3:
raise SyntacticError("Malformed if expression")
cond = eval_tree(args[0])
return eval_tree(args[1] if cond else args[2])
def _make_builtin(binop):
def _builtin(args):
try:
return reduce(binop, [eval_tree(arg) for arg in args])
except Exception as e:
raise EvaluationError(str(e))
return _builtin
BUILTINS = {
'*': _make_builtin(operator.imul),
'+': _make_builtin(operator.iadd),
'-': _make_builtin(operator.isub),
'/': _make_builtin(operator.itruediv),
'define': _define_symbol_builtin,
'if': _if_symbol_builtin,
}
# Dictionary of user-defined symbols (primitve values or functions)
USER_DEFINED = {}
# Stack of dictionaries with function parameters. With each function call, the
# function parameters are pushed onto the stack.
ARGUMENTS_STACK = []
def lookup_symbol(symname):
symbol = None
for args in ARGUMENTS_STACK[::-1]:
symbol = args.get(symname)
if symbol is not None:
break
if symbol is None:
symbol = USER_DEFINED.get(symname)
if symbol is None:
symbol = BUILTINS.get(symname)
if symbol is None:
raise SymbolError(symname)
return symbol
@contextlib.contextmanager
def on_stack(args):
try:
ARGUMENTS_STACK.append(args)
yield
finally:
ARGUMENTS_STACK.pop()
# Tokenizing ##################################################################
def tokenize(code):
depth = 0
tokens = []
line_num = 1
line_start = 0
for mo in re.finditer(TOKEN_RE, code):
column = mo.start() - line_start
kind = mo.lastgroup
value = mo.group(kind)
if kind == NEWLINE:
line_num += 1
line_start = mo.end()
elif kind == SKIP:
pass
elif kind == MISMATCH:
raise RuntimeError(f'{line_num}:{column} {value!r} unexpected.')
else:
if kind == NUMBER:
if '.' in value:
kind = FLOAT
value = float(value)
else:
kind = INTEGER
value = int(value)
elif kind == STRING:
value = value[1:-1]
elif kind == OPEN_BRACKET:
depth += 1
elif kind == CLOSE_BRACKET:
depth -= 1
tokens.append(Token(kind, value, line_num, column))
return tokens, depth
# Building the AST ############################################################
def consume_next(t_queue):
try:
return t_queue.popleft()
except IndexError:
return None
def peek_next(t_queue):
try:
return t_queue[0]
except IndexError:
return None
def _make_tree(tokens_queue):
if len(tokens_queue) == 0:
return Tree(None, [], False)
token = consume_next(tokens_queue)
if token.kind in PRIMITIVE:
return Tree(token.value, [], primitive=True), tokens_queue
elif token.kind == LITERAL:
return Tree(token.value, [], primitive=False), tokens_queue
elif token.kind == OPEN_BRACKET:
children = []
while True:
next_token = peek_next(tokens_queue)
if next_token is None:
raise ParseError.from_token("Unexpected end of line", token)
if next_token.kind == CLOSE_BRACKET:
consume_next(tokens_queue)
break
child, _ = _make_tree(tokens_queue)
children.append(child)
return Tree(None, children, primitive=False), tokens_queue
else:
raise ParseError.from_token("Unexpected data", token)
def make_tree_from_tokens(tokens):
tokens_queue = deque(tokens)
tree, remaining = _make_tree(tokens_queue)
return tree, remaining
# Evaluating the AST ##########################################################
def eval_tree(tree):
if tree.value is not None:
if tree.primitive:
return tree.value
else:
return lookup_symbol(tree.value)
else:
assert len(tree.children) > 0 # FIXME what to do with ()?
operator = eval_tree(tree.children[0])
return operator(tree.children[1:])
# Read-eval-print loop ########################################################
REPL_CANCELED = "canceled"
REPL_BREAK_MULTILINE = "break-multiline"
REPL_OK = "multiline-ok"
def read_and_tokenize():
try:
line = input("--> ")
except EOFError:
print()
return REPL_CANCELED, []
tokens, depth = tokenize(line)
while depth != 0:
# Read additional lines
try:
line = input("... ")
except EOFError:
print()
return REPL_BREAK_MULTILINE, []
add_tokens, add_depth = tokenize(line)
tokens.extend(add_tokens)
depth += add_depth
return REPL_OK, tokens
def repl():
while True:
try:
status, tokens = read_and_tokenize()
if status == REPL_CANCELED:
break
elif status == REPL_BREAK_MULTILINE:
continue
while tokens:
tree, tokens = make_tree_from_tokens(tokens)
result = eval_tree(tree)
print(result)
except InterpreterError as e:
print(e, file=sys.stderr)
continue
if __name__ == '__main__':
repl()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment