Created
March 15, 2016 00:39
-
-
Save SegFaultAX/14f59b5d76247f45cf4d to your computer and use it in GitHub Desktop.
Simple lisp-like reader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Much of this code is either directly copied from or heavily inspired by the | |
# following example lisp interpreter. The essential structure of this parser is | |
# mostly an exact replica, and for that I thank the original author for his | |
# inspirational work. | |
# Author: Peter Norvig | |
# Source: http://norvig.com/lispy2.html (http://norvig.com/lispy.py) | |
import re | |
__author__ = "Michael-Keith Bernard" | |
__all__ = ["read", "to_str"] | |
class Symbol(str): | |
pass | |
EOF = Symbol("#<END-OF-FILE>") | |
TOKENIZER = r"""(?:\s|,)*([()]|"(?:[\\].|[^\\"])*"|;.*|[^\s(";,)]*)(.*)""" | |
def sym(s, table): | |
table.setdefault(s, Symbol(s)) | |
return table[s] | |
def atom(token, table): | |
if token in ("#t", "true", "True"): | |
return True | |
elif token in ("#f", "false", "False"): | |
return False | |
elif token in ("nil", "null", "none", "None"): | |
return None | |
elif token[0] == '"': | |
return token[1:-1].decode("string_escape") | |
try: | |
return int(token) | |
except ValueError: | |
try: | |
return float(token) | |
except ValueError: | |
try: | |
return complex(token.replace("i", "j", 1)) | |
except ValueError: | |
return sym(token, table) | |
def to_str(obj): | |
if obj is True: | |
return "true" | |
elif obj is False: | |
return "false" | |
elif obj is None: | |
return "null" | |
elif isinstance(obj, Symbol): | |
return obj | |
elif isinstance(obj, str): | |
return '"{}"'.format(obj.encode("string_escape").replace('"', r'\"')) | |
elif isinstance(obj, (list, tuple)): | |
return "({})".format(" ".join(to_str(e) for e in obj)) | |
elif isinstance(obj, complex): | |
return str(obj).replace("j", "i") | |
else: | |
return str(obj) | |
def tokenize(s): | |
lines = iter(s.splitlines()) | |
line = "" | |
while True: | |
if line == "": | |
line = next(lines) | |
if line == "": | |
break | |
token, line = re.match(TOKENIZER, line).groups() | |
if token != "" and not token.startswith(";"): | |
yield token | |
yield EOF | |
def read_next(tokens, table): | |
def read1(token): | |
if "(" == token: | |
sub = [] | |
while True: | |
token = next(tokens) | |
if token == ")": | |
return sub | |
else: | |
sub.append(read1(token)) | |
elif ")" == token: | |
raise SyntaxError("unexpected )") | |
elif token is EOF: | |
raise SyntaxError("unexpected EOF in list") | |
else: | |
return atom(token, table) | |
token1 = next(tokens) | |
return EOF if token1 is EOF else read1(token1) | |
def read(expr, table=None): | |
if table is None: | |
table = {} | |
tokens = tokenize(expr) | |
while True: | |
parsed = read_next(tokens, table) | |
if parsed is EOF: | |
break | |
yield parsed | |
if __name__ == "__main__": | |
import unittest | |
class TestReader(unittest.TestCase): | |
def setUp(self): | |
self.table = {} | |
def test_tokenizer(self): | |
exprs = { | |
"a": ["a"], | |
"a b c": ["a", "b", "c"], | |
'"Hello, world!"': ['"Hello, world!"'], | |
"123": ["123"], | |
"4.56": ["4.56"], | |
"(1 2 3)": ["(", "1", "2", "3", ")"], | |
"(1, 2, 3)": ["(", "1", "2", "3", ")"], | |
"(1 2 (3 4))": ["(", "1", "2", "(", "3", "4", ")", ")"], | |
} | |
for expr, result in exprs.iteritems(): | |
self.assertEqual(list(tokenize(expr)), result) | |
def test_reader(self): | |
exprs = { | |
"a": [Symbol("a")], | |
"a b c": [Symbol("a"), Symbol("b"), Symbol("c")], | |
'"Hello, world!"': ['Hello, world!'], | |
"123": [123], | |
"4.56": [4.56], | |
"(1 2 3)": [[1, 2, 3]], | |
"(1, 2, 3)": [[1, 2, 3]], | |
"(1 2 (3 4))": [[1, 2, [3, 4]]], | |
} | |
for expr, result in exprs.iteritems(): | |
self.assertEqual(list(read(expr, self.table)), result) | |
def test_to_str(self): | |
exprs = { | |
"a": [Symbol("a")], | |
"a b c": [Symbol("a"), Symbol("b"), Symbol("c")], | |
'"Hello, world!"': ['Hello, world!'], | |
"123": [123], | |
"4.56": [4.56], | |
"(1 2 3)": [[1, 2, 3]], | |
"(1 2 (3 4))": [[1, 2, [3, 4]]], | |
} | |
for expr, result in exprs.iteritems(): | |
self.assertEqual(" ".join(to_str(e) for e in result), expr) | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment