Skip to content

Instantly share code, notes, and snippets.

@SegFaultAX
Created March 15, 2016 00:39
Show Gist options
  • Save SegFaultAX/14f59b5d76247f45cf4d to your computer and use it in GitHub Desktop.
Save SegFaultAX/14f59b5d76247f45cf4d to your computer and use it in GitHub Desktop.
Simple lisp-like reader
#!/usr/bin/env python
# Much of this code is either directly copied from or heavily inspired by the
# following example lisp interpreter. The essential structure of this parser is
# mostly an exact replica, and for that I thank the original author for his
# inspirational work.
# Author: Peter Norvig
# Source: http://norvig.com/lispy2.html (http://norvig.com/lispy.py)
import re
__author__ = "Michael-Keith Bernard"
__all__ = ["read", "to_str"]
class Symbol(str):
pass
EOF = Symbol("#<END-OF-FILE>")
TOKENIZER = r"""(?:\s|,)*([()]|"(?:[\\].|[^\\"])*"|;.*|[^\s(";,)]*)(.*)"""
def sym(s, table):
table.setdefault(s, Symbol(s))
return table[s]
def atom(token, table):
if token in ("#t", "true", "True"):
return True
elif token in ("#f", "false", "False"):
return False
elif token in ("nil", "null", "none", "None"):
return None
elif token[0] == '"':
return token[1:-1].decode("string_escape")
try:
return int(token)
except ValueError:
try:
return float(token)
except ValueError:
try:
return complex(token.replace("i", "j", 1))
except ValueError:
return sym(token, table)
def to_str(obj):
if obj is True:
return "true"
elif obj is False:
return "false"
elif obj is None:
return "null"
elif isinstance(obj, Symbol):
return obj
elif isinstance(obj, str):
return '"{}"'.format(obj.encode("string_escape").replace('"', r'\"'))
elif isinstance(obj, (list, tuple)):
return "({})".format(" ".join(to_str(e) for e in obj))
elif isinstance(obj, complex):
return str(obj).replace("j", "i")
else:
return str(obj)
def tokenize(s):
lines = iter(s.splitlines())
line = ""
while True:
if line == "":
line = next(lines)
if line == "":
break
token, line = re.match(TOKENIZER, line).groups()
if token != "" and not token.startswith(";"):
yield token
yield EOF
def read_next(tokens, table):
def read1(token):
if "(" == token:
sub = []
while True:
token = next(tokens)
if token == ")":
return sub
else:
sub.append(read1(token))
elif ")" == token:
raise SyntaxError("unexpected )")
elif token is EOF:
raise SyntaxError("unexpected EOF in list")
else:
return atom(token, table)
token1 = next(tokens)
return EOF if token1 is EOF else read1(token1)
def read(expr, table=None):
if table is None:
table = {}
tokens = tokenize(expr)
while True:
parsed = read_next(tokens, table)
if parsed is EOF:
break
yield parsed
if __name__ == "__main__":
import unittest
class TestReader(unittest.TestCase):
def setUp(self):
self.table = {}
def test_tokenizer(self):
exprs = {
"a": ["a"],
"a b c": ["a", "b", "c"],
'"Hello, world!"': ['"Hello, world!"'],
"123": ["123"],
"4.56": ["4.56"],
"(1 2 3)": ["(", "1", "2", "3", ")"],
"(1, 2, 3)": ["(", "1", "2", "3", ")"],
"(1 2 (3 4))": ["(", "1", "2", "(", "3", "4", ")", ")"],
}
for expr, result in exprs.iteritems():
self.assertEqual(list(tokenize(expr)), result)
def test_reader(self):
exprs = {
"a": [Symbol("a")],
"a b c": [Symbol("a"), Symbol("b"), Symbol("c")],
'"Hello, world!"': ['Hello, world!'],
"123": [123],
"4.56": [4.56],
"(1 2 3)": [[1, 2, 3]],
"(1, 2, 3)": [[1, 2, 3]],
"(1 2 (3 4))": [[1, 2, [3, 4]]],
}
for expr, result in exprs.iteritems():
self.assertEqual(list(read(expr, self.table)), result)
def test_to_str(self):
exprs = {
"a": [Symbol("a")],
"a b c": [Symbol("a"), Symbol("b"), Symbol("c")],
'"Hello, world!"': ['Hello, world!'],
"123": [123],
"4.56": [4.56],
"(1 2 3)": [[1, 2, 3]],
"(1 2 (3 4))": [[1, 2, [3, 4]]],
}
for expr, result in exprs.iteritems():
self.assertEqual(" ".join(to_str(e) for e in result), expr)
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment