Skip to content

Instantly share code, notes, and snippets.

@lajarre
Created November 3, 2020 15:12
Show Gist options
  • Save lajarre/3c9cd7dda902ef439eaf055b88c58d12 to your computer and use it in GitHub Desktop.
Save lajarre/3c9cd7dda902ef439eaf055b88c58d12 to your computer and use it in GitHub Desktop.
Lisp parser and interpreter in Python
from datatypes import Atom, List, Expression
class Interpreter:
class InterpreterError(Exception):
pass
PROC_MAPPING = {"+": lambda *args: sum(args)}
ast: Expression
def __init__(self, ast: Expression):
self.ast = ast
def run(self):
return self._interpret_node(self.ast)
def _interpret_node(self, node: Expression):
if self._is_List(node):
if len(node) == 0:
return None # XXX?
return self._apply_procedure(*node)
if self._is_Atom(node):
return node
def _apply_procedure(self, *nodes: List[Expression]):
proc = nodes[0]
if self._is_Str(proc):
return self.PROC_MAPPING[proc](*nodes[1:])
if self._is_List(proc):
return self.PROC_MAPPING[self._interpret_node(proc)](*nodes[1:])
raise self.InterpreterError("")
def _is_List(self, node):
return isinstance(node, list)
def _is_Str(self, node):
return isinstance(node, str)
def _is_Int(self, node):
return isinstance(node, int)
def _is_Float(self, node):
return isinstance(node, float)
def _is_Atom(self, node):
return self._is_Str(node) or self._is_Int(node) or self._is_Float(node)
from datatypes import Atom, List, Expression
class InputStream:
class ParserException(Exception):
pass
pos = -1
line = 1
col = 0
program = None
def __init__(self, program):
self.program = program
def next(self):
self.pos += 1
char = self.program[self.pos]
if char == "\n":
self.line += 1
self.col = 0
else:
self.col += 1
return char
def peek(self):
return self.program[self.pos + 1]
def eof(self):
try:
self.peek()
except IndexError:
return True
return False
def croak(self, message):
raise self.ParserException(f"{message} - At {self.line}:{self.col}")
class TokenStream:
WHITESPACE = [" ", "\t", "\n"]
SPECIAL_CHARS = ["(", ")"]
next_token = None
input_stream: InputStream
def __init__(self, input_stream):
self.input_stream = input_stream
self.next_token = self.get_next_token()
def next(self):
current_token = self.next_token
self.next_token = self.get_next_token()
return current_token
def peek(self):
return self.next_token
def eof(self):
return self.peek() == ""
def croak(self, message):
self.input_stream.croak(
f"{message} - Next token: {'EOF' if self.eof() else self.peek()}"
)
def get_next_token(self):
while (
not self.input_stream.eof() and self.input_stream.peek() in self.WHITESPACE
):
self.input_stream.next()
if (
not self.input_stream.eof()
and self.input_stream.peek() in self.SPECIAL_CHARS
):
return self.input_stream.next()
chars = ""
while (
not self.input_stream.eof()
and self.input_stream.peek() not in self.WHITESPACE
and self.input_stream.peek() not in self.SPECIAL_CHARS
):
chars += self.input_stream.next()
return chars
# Build this as a stream?
class Parser:
LEFT_PARENS = "("
RIGHT_PARENS = ")"
token_stream: TokenStream
def __init__(self, token_stream):
self.token_stream = token_stream
def parse_program(self) -> Expression:
node = self._parse()
if not self.token_stream.eof():
self._raise_unexpected_token(
"EOF not reached at end of parse tree: check your parentheses."
)
return node
def _raise_unexpected_token(self, message):
self.token_stream.croak(message or "Unexpected token")
def _parse(self) -> Expression:
node: Expression = (
self._parse_parens()
if self.token_stream.peek() == self.LEFT_PARENS
else self._parse_atom()
)
return node
def _parse_parens(self) -> List:
nodes: List = []
parens_matched: bool = False
self.token_stream.next()
while not self.token_stream.eof():
if self.token_stream.peek() == self.RIGHT_PARENS:
parens_matched = True
break
nodes.append(self._parse())
self.token_stream.next()
if not parens_matched:
self._raise_unexpected_token(
"Right parenthesis not found: check your parentheses"
)
return nodes
def _parse_atom(self) -> Atom:
token = self.token_stream.next()
try:
return int(token)
except ValueError:
pass
try:
return float(token)
except ValueError:
pass
return token
import pytest
from parser import Parser, TokenStream, InputStream
from interpreter import Interpreter
def test_interpret_sum():
program = "(+ 2 33)"
parser = Parser(TokenStream(InputStream(program)))
ast = parser.parse_program()
interpreter = Interpreter(ast)
assert interpreter.run() == 35
import pytest
from parser import Parser, TokenStream, InputStream
def test_1line_program_parses_well():
program1line = """(first (list 1 (+ 2 33) 9))"""
parser = Parser(TokenStream(InputStream(program1line)))
assert parser.parse_program() == ["first", ["list", 1, ["+", 2, 33], 9]]
def test_multiline_program_parses_well():
programmultiline = """(begin
(define r 10)
(* pi (* r r))
)"""
parser = Parser(TokenStream(InputStream(programmultiline)))
assert parser.parse_program() == [
"begin",
["define", "r", 10],
["*", "pi", ["*", "r", "r"]],
]
def test_program_with_raparens_missing_fails():
programwitherror_rparensmissing = "(first(second 1)"
parser = Parser(TokenStream(InputStream(programwitherror_rparensmissing)))
with pytest.raises(InputStream.ParserException):
parser.parse_program()
def test_program_with_laparens_missing_fails():
programwitherror_lparensmissing = "(first(second 1)"
parser = Parser(TokenStream(InputStream(programwitherror_lparensmissing)))
with pytest.raises(InputStream.ParserException):
parser.parse_program()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment