Created
December 12, 2017 18:11
-
-
Save jvkersch/3d693b51b8540288fb0c19f9ca9d0b59 to your computer and use it in GitHub Desktop.
scheme interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# TODO | |
# 2. support block structure | |
from collections import deque, namedtuple | |
import contextlib | |
from functools import reduce | |
import operator | |
import re | |
import sys | |
# Types | |
Token = namedtuple('Token', ['kind', 'value', 'line', 'column']) | |
Tree = namedtuple('Tree', ['value', 'children', 'primitive']) | |
# Token types | |
INTEGER = 'integer' | |
FLOAT = 'float' | |
NUMBER = 'number' | |
STRING = 'string' | |
OPEN_BRACKET = 'open_bracket' | |
CLOSE_BRACKET = 'close_bracket' | |
NEWLINE = 'newline' | |
SKIP = 'skip' | |
LITERAL = 'literal' | |
MISMATCH = 'mismatch' | |
# Token specification | |
NUMBERS = (INTEGER, FLOAT) | |
PRIMITIVE = NUMBERS + (STRING,) | |
TOKEN_SPEC = [ | |
(OPEN_BRACKET, r'\('), | |
(CLOSE_BRACKET, r'\)'), | |
(NUMBER, r'\d+(\.\d*)?'), | |
(STRING, r'".*"'), | |
(NEWLINE, r'\n'), | |
(SKIP, r'[ \t]+'), | |
(LITERAL, r'[^\s\(\)"]+'), | |
(MISMATCH, r'.'), | |
] | |
TOKEN_RE = '|'.join('(?P<{}>{})'.format(*pair) for pair in TOKEN_SPEC) | |
TOKEN_RE = re.compile(TOKEN_RE) | |
# Exceptions | |
class InterpreterError(Exception): | |
pass | |
class ParseError(InterpreterError): | |
def __init__(self, msg, line=-1, column=-1): | |
if line != -1 and column != -1: | |
msg = f"{msg} (line {line}, column {column})" | |
super(InterpreterError, self).__init__(msg) | |
self.line = line | |
self.column = column | |
@classmethod | |
def from_token(cls, msg, token): | |
return cls(msg, line=token.line, column=token.column) | |
class SymbolError(InterpreterError): | |
def __init__(self, symname): | |
super(SymbolError, self).__init__(f"Unknown symbol: {symname}") | |
class SyntacticError(InterpreterError): | |
pass | |
class EvaluationError(InterpreterError): | |
def __init__(self, runtime_msg): | |
super(EvaluationError, self).__init__(f"Runtime error: {runtime_msg}") | |
# Functions built into the interpreter and user-defined symbols | |
class Function: | |
def __init__(self, parameters, body): | |
self.arg_names = self._validate_argnames(parameters) | |
self.body = body | |
def _validate_argnames(self, parameters): | |
argnames = [] | |
for p in parameters: | |
name = p.value | |
if name is None or len(p.children) > 0: | |
raise SyntacticError("Function value cannot be compound") | |
if p.primitive: | |
raise SyntacticError(f"Cannot use primitive value {name} " | |
f"as a formal parameter") | |
argnames.append(name) | |
return argnames | |
def __str__(self): | |
return "function {} => <body>".format(', '.join(self.arg_names)) | |
def __call__(self, args): | |
if len(args) != len(self.arg_names): | |
raise EvaluationError("Wrong number of arguments") | |
named_args = { | |
name: eval_tree(arg) for name, arg in zip(self.arg_names, args) | |
} | |
with on_stack(named_args): | |
result = eval_tree(self.body) | |
return result | |
def _define_symbol_builtin(args): | |
if len(args) != 2: | |
raise SyntacticError("Wrong number of arguments in define expression") | |
symbname, symbval = args | |
if symbname.value is not None: | |
# defining a variable | |
if symbname.primitive: | |
raise SyntacticError("Attempting to define a primitive value.") | |
USER_DEFINED[symbname.value] = val = eval_tree(symbval) | |
return f"{symbname.value} <- {val}" | |
else: | |
# defining a function | |
funcname, *funcargs = symbname.children | |
USER_DEFINED[funcname.value] = f = Function(funcargs, symbval) | |
return f"{funcname.value} <- {f}" | |
def _if_symbol_builtin(args): | |
if len(args) != 3: | |
raise SyntacticError("Malformed if expression") | |
cond = eval_tree(args[0]) | |
return eval_tree(args[1] if cond else args[2]) | |
def _make_builtin(binop): | |
def _builtin(args): | |
try: | |
return reduce(binop, [eval_tree(arg) for arg in args]) | |
except Exception as e: | |
raise EvaluationError(str(e)) | |
return _builtin | |
BUILTINS = { | |
'*': _make_builtin(operator.imul), | |
'+': _make_builtin(operator.iadd), | |
'-': _make_builtin(operator.isub), | |
'/': _make_builtin(operator.itruediv), | |
'define': _define_symbol_builtin, | |
'if': _if_symbol_builtin, | |
} | |
# Dictionary of user-defined symbols (primitve values or functions) | |
USER_DEFINED = {} | |
# Stack of dictionaries with function parameters. With each function call, the | |
# function parameters are pushed onto the stack. | |
ARGUMENTS_STACK = [] | |
def lookup_symbol(symname): | |
symbol = None | |
for args in ARGUMENTS_STACK[::-1]: | |
symbol = args.get(symname) | |
if symbol is not None: | |
break | |
if symbol is None: | |
symbol = USER_DEFINED.get(symname) | |
if symbol is None: | |
symbol = BUILTINS.get(symname) | |
if symbol is None: | |
raise SymbolError(symname) | |
return symbol | |
@contextlib.contextmanager | |
def on_stack(args): | |
try: | |
ARGUMENTS_STACK.append(args) | |
yield | |
finally: | |
ARGUMENTS_STACK.pop() | |
# Tokenizing ################################################################## | |
def tokenize(code): | |
depth = 0 | |
tokens = [] | |
line_num = 1 | |
line_start = 0 | |
for mo in re.finditer(TOKEN_RE, code): | |
column = mo.start() - line_start | |
kind = mo.lastgroup | |
value = mo.group(kind) | |
if kind == NEWLINE: | |
line_num += 1 | |
line_start = mo.end() | |
elif kind == SKIP: | |
pass | |
elif kind == MISMATCH: | |
raise RuntimeError(f'{line_num}:{column} {value!r} unexpected.') | |
else: | |
if kind == NUMBER: | |
if '.' in value: | |
kind = FLOAT | |
value = float(value) | |
else: | |
kind = INTEGER | |
value = int(value) | |
elif kind == STRING: | |
value = value[1:-1] | |
elif kind == OPEN_BRACKET: | |
depth += 1 | |
elif kind == CLOSE_BRACKET: | |
depth -= 1 | |
tokens.append(Token(kind, value, line_num, column)) | |
return tokens, depth | |
# Building the AST ############################################################ | |
def consume_next(t_queue): | |
try: | |
return t_queue.popleft() | |
except IndexError: | |
return None | |
def peek_next(t_queue): | |
try: | |
return t_queue[0] | |
except IndexError: | |
return None | |
def _make_tree(tokens_queue): | |
if len(tokens_queue) == 0: | |
return Tree(None, [], False) | |
token = consume_next(tokens_queue) | |
if token.kind in PRIMITIVE: | |
return Tree(token.value, [], primitive=True), tokens_queue | |
elif token.kind == LITERAL: | |
return Tree(token.value, [], primitive=False), tokens_queue | |
elif token.kind == OPEN_BRACKET: | |
children = [] | |
while True: | |
next_token = peek_next(tokens_queue) | |
if next_token is None: | |
raise ParseError.from_token("Unexpected end of line", token) | |
if next_token.kind == CLOSE_BRACKET: | |
consume_next(tokens_queue) | |
break | |
child, _ = _make_tree(tokens_queue) | |
children.append(child) | |
return Tree(None, children, primitive=False), tokens_queue | |
else: | |
raise ParseError.from_token("Unexpected data", token) | |
def make_tree_from_tokens(tokens): | |
tokens_queue = deque(tokens) | |
tree, remaining = _make_tree(tokens_queue) | |
return tree, remaining | |
# Evaluating the AST ########################################################## | |
def eval_tree(tree): | |
if tree.value is not None: | |
if tree.primitive: | |
return tree.value | |
else: | |
return lookup_symbol(tree.value) | |
else: | |
assert len(tree.children) > 0 # FIXME what to do with ()? | |
operator = eval_tree(tree.children[0]) | |
return operator(tree.children[1:]) | |
# Read-eval-print loop ######################################################## | |
REPL_CANCELED = "canceled" | |
REPL_BREAK_MULTILINE = "break-multiline" | |
REPL_OK = "multiline-ok" | |
def read_and_tokenize(): | |
try: | |
line = input("--> ") | |
except EOFError: | |
print() | |
return REPL_CANCELED, [] | |
tokens, depth = tokenize(line) | |
while depth != 0: | |
# Read additional lines | |
try: | |
line = input("... ") | |
except EOFError: | |
print() | |
return REPL_BREAK_MULTILINE, [] | |
add_tokens, add_depth = tokenize(line) | |
tokens.extend(add_tokens) | |
depth += add_depth | |
return REPL_OK, tokens | |
def repl(): | |
while True: | |
try: | |
status, tokens = read_and_tokenize() | |
if status == REPL_CANCELED: | |
break | |
elif status == REPL_BREAK_MULTILINE: | |
continue | |
while tokens: | |
tree, tokens = make_tree_from_tokens(tokens) | |
result = eval_tree(tree) | |
print(result) | |
except InterpreterError as e: | |
print(e, file=sys.stderr) | |
continue | |
if __name__ == '__main__': | |
repl() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment