Created
November 3, 2020 15:12
-
-
Save lajarre/3c9cd7dda902ef439eaf055b88c58d12 to your computer and use it in GitHub Desktop.
Lisp parser and interpreter in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datatypes import Atom, List, Expression | |
class Interpreter: | |
class InterpreterError(Exception): | |
pass | |
PROC_MAPPING = {"+": lambda *args: sum(args)} | |
ast: Expression | |
def __init__(self, ast: Expression): | |
self.ast = ast | |
def run(self): | |
return self._interpret_node(self.ast) | |
def _interpret_node(self, node: Expression): | |
if self._is_List(node): | |
if len(node) == 0: | |
return None # XXX? | |
return self._apply_procedure(*node) | |
if self._is_Atom(node): | |
return node | |
def _apply_procedure(self, *nodes: List[Expression]): | |
proc = nodes[0] | |
if self._is_Str(proc): | |
return self.PROC_MAPPING[proc](*nodes[1:]) | |
if self._is_List(proc): | |
return self.PROC_MAPPING[self._interpret_node(proc)](*nodes[1:]) | |
raise self.InterpreterError("") | |
def _is_List(self, node): | |
return isinstance(node, list) | |
def _is_Str(self, node): | |
return isinstance(node, str) | |
def _is_Int(self, node): | |
return isinstance(node, int) | |
def _is_Float(self, node): | |
return isinstance(node, float) | |
def _is_Atom(self, node): | |
return self._is_Str(node) or self._is_Int(node) or self._is_Float(node) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datatypes import Atom, List, Expression | |
class InputStream: | |
class ParserException(Exception): | |
pass | |
pos = -1 | |
line = 1 | |
col = 0 | |
program = None | |
def __init__(self, program): | |
self.program = program | |
def next(self): | |
self.pos += 1 | |
char = self.program[self.pos] | |
if char == "\n": | |
self.line += 1 | |
self.col = 0 | |
else: | |
self.col += 1 | |
return char | |
def peek(self): | |
return self.program[self.pos + 1] | |
def eof(self): | |
try: | |
self.peek() | |
except IndexError: | |
return True | |
return False | |
def croak(self, message): | |
raise self.ParserException(f"{message} - At {self.line}:{self.col}") | |
class TokenStream: | |
WHITESPACE = [" ", "\t", "\n"] | |
SPECIAL_CHARS = ["(", ")"] | |
next_token = None | |
input_stream: InputStream | |
def __init__(self, input_stream): | |
self.input_stream = input_stream | |
self.next_token = self.get_next_token() | |
def next(self): | |
current_token = self.next_token | |
self.next_token = self.get_next_token() | |
return current_token | |
def peek(self): | |
return self.next_token | |
def eof(self): | |
return self.peek() == "" | |
def croak(self, message): | |
self.input_stream.croak( | |
f"{message} - Next token: {'EOF' if self.eof() else self.peek()}" | |
) | |
def get_next_token(self): | |
while ( | |
not self.input_stream.eof() and self.input_stream.peek() in self.WHITESPACE | |
): | |
self.input_stream.next() | |
if ( | |
not self.input_stream.eof() | |
and self.input_stream.peek() in self.SPECIAL_CHARS | |
): | |
return self.input_stream.next() | |
chars = "" | |
while ( | |
not self.input_stream.eof() | |
and self.input_stream.peek() not in self.WHITESPACE | |
and self.input_stream.peek() not in self.SPECIAL_CHARS | |
): | |
chars += self.input_stream.next() | |
return chars | |
# Build this as a stream? | |
class Parser: | |
LEFT_PARENS = "(" | |
RIGHT_PARENS = ")" | |
token_stream: TokenStream | |
def __init__(self, token_stream): | |
self.token_stream = token_stream | |
def parse_program(self) -> Expression: | |
node = self._parse() | |
if not self.token_stream.eof(): | |
self._raise_unexpected_token( | |
"EOF not reached at end of parse tree: check your parentheses." | |
) | |
return node | |
def _raise_unexpected_token(self, message): | |
self.token_stream.croak(message or "Unexpected token") | |
def _parse(self) -> Expression: | |
node: Expression = ( | |
self._parse_parens() | |
if self.token_stream.peek() == self.LEFT_PARENS | |
else self._parse_atom() | |
) | |
return node | |
def _parse_parens(self) -> List: | |
nodes: List = [] | |
parens_matched: bool = False | |
self.token_stream.next() | |
while not self.token_stream.eof(): | |
if self.token_stream.peek() == self.RIGHT_PARENS: | |
parens_matched = True | |
break | |
nodes.append(self._parse()) | |
self.token_stream.next() | |
if not parens_matched: | |
self._raise_unexpected_token( | |
"Right parenthesis not found: check your parentheses" | |
) | |
return nodes | |
def _parse_atom(self) -> Atom: | |
token = self.token_stream.next() | |
try: | |
return int(token) | |
except ValueError: | |
pass | |
try: | |
return float(token) | |
except ValueError: | |
pass | |
return token |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytest | |
from parser import Parser, TokenStream, InputStream | |
from interpreter import Interpreter | |
def test_interpret_sum(): | |
program = "(+ 2 33)" | |
parser = Parser(TokenStream(InputStream(program))) | |
ast = parser.parse_program() | |
interpreter = Interpreter(ast) | |
assert interpreter.run() == 35 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytest | |
from parser import Parser, TokenStream, InputStream | |
def test_1line_program_parses_well(): | |
program1line = """(first (list 1 (+ 2 33) 9))""" | |
parser = Parser(TokenStream(InputStream(program1line))) | |
assert parser.parse_program() == ["first", ["list", 1, ["+", 2, 33], 9]] | |
def test_multiline_program_parses_well(): | |
programmultiline = """(begin | |
(define r 10) | |
(* pi (* r r)) | |
)""" | |
parser = Parser(TokenStream(InputStream(programmultiline))) | |
assert parser.parse_program() == [ | |
"begin", | |
["define", "r", 10], | |
["*", "pi", ["*", "r", "r"]], | |
] | |
def test_program_with_raparens_missing_fails(): | |
programwitherror_rparensmissing = "(first(second 1)" | |
parser = Parser(TokenStream(InputStream(programwitherror_rparensmissing))) | |
with pytest.raises(InputStream.ParserException): | |
parser.parse_program() | |
def test_program_with_laparens_missing_fails(): | |
programwitherror_lparensmissing = "(first(second 1)" | |
parser = Parser(TokenStream(InputStream(programwitherror_lparensmissing))) | |
with pytest.raises(InputStream.ParserException): | |
parser.parse_program() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment