Skip to content

Instantly share code, notes, and snippets.

@eliasdorneles
Created June 9, 2016 19:23
Show Gist options
  • Save eliasdorneles/d04785be555046de66d0237d47aaa663 to your computer and use it in GitHub Desktop.
Save eliasdorneles/d04785be555046de66d0237d47aaa663 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Lisp parser (future interpreter)
"""
from __future__ import print_function
import sys
import re
from collections import namedtuple
PY2 = sys.version_info.major == 2
raw_input = raw_input if PY2 else input
class LispSyntaxError(ValueError):
"Raised for errors in Lisp syntax"
VALID_IDENT_CHARS = r'-a-zA-Z?!+*/%'
TOKENS = [
('IDENTIFIER',
'[' + VALID_IDENT_CHARS + '][' + VALID_IDENT_CHARS + '0-9]*'),
('LPAREN', r'[(]'),
('RPAREN', r'[)]'),
('NUMBER', r'\d+'),
]
def read_file(path):
with open(path) as f:
return f.read()
Token = namedtuple('Token', 'name value')
def _tokenize(s):
"""Generate a list of tokens, ignoring whitespaces
"""
cur_pos = 0
while cur_pos < len(s):
m = re.match(r'^\s+', s[cur_pos:])
if m:
cur_pos += m.span()[1]
continue
for tok_name, tok_re in TOKENS:
m = re.match(r'^' + tok_re, s[cur_pos:])
if m:
yield Token(tok_name, m.group())
cur_pos += m.span()[1]
break
else:
raise LispSyntaxError("Unexpected character: %s (position: %d)" % (s[cur_pos], cur_pos))
def lex(input):
return list(_tokenize(input))
def parse(input, single_expr=False, debug=False):
"""Parse minimalist Lisp grammar.
If single_expr=True, will expect a single expression (useful for REPL).
If debug=True, will print the tokens before parsing.
Based on recursive descent parser code at Wikipedia:
https://en.wikipedia.org/wiki/Recursive_descent_parser
"""
tokens = lex(input)
if debug:
print('Tokens:', tokens)
tokens = iter(tokens)
current_token_holder = [None]
def current():
"""Return current token"""
return current_token_holder[0]
def next_token():
"""Advance to next token"""
try:
current_token_holder[0] = next(tokens)
except:
current_token_holder[0] = None
def accept(tok_name):
"""If current token matches, return its value and advance to next"""
if current() is not None and current().name == tok_name:
value = current().value
next_token()
return value
def expect(tok_name):
"""Accept a token returning its value, or else raise an error
"""
got = current()
value = accept(tok_name)
if not value:
if got is not None:
got = got.value
raise LispSyntaxError("Error: Expected %s but got %s"
% (tok_name, got))
return value
def parse_expr():
"""Parse a single expression.
expr := NUMBER | IDENTIFIER | ( IDENTIFIER expr_list )
"""
value = accept('NUMBER')
if value:
return int(value)
value = accept('IDENTIFIER')
if value:
return value
value = accept('LPAREN')
if value:
ident = expect('IDENTIFIER')
expr_list = parse_expr_list()
expect('RPAREN')
return [ident] + expr_list
def parse_expr_list():
"""Parse an expression list.
expr_list := expr | expr EXPR_LIST
"""
result = []
value = parse_expr()
while value is not None:
result.append(value)
value = parse_expr()
return result
next_token()
if single_expr:
result = parse_expr()
else:
result = parse_expr_list()
if current() is not None:
raise LispSyntaxError('Unexpected token: {}'.format(current().value))
return result
def repl(debug=False):
"""Read-Eval-Print Loop
Currently, this is more like a Read-Parse-Print-Loop :)
"""
def readline():
try:
return raw_input('> ')
except EOFError:
return None
code = readline()
while code:
try:
print(parse(code, single_expr=True, debug=debug))
except LispSyntaxError as e:
print('%s' % e)
code = readline()
def parse_args():
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('input', nargs='?',
help='Source input (if not given, will read stdin)')
parser.add_argument('--debug', action='store_true')
return parser.parse_args()
def main():
args = parse_args()
should_repl = not args.input and sys.stdin.isatty()
if should_repl:
repl(debug=args.debug)
else:
if args.input:
with open(args.input) as f:
code = f.read()
else:
code = sys.stdin.read()
try:
print(parse(code, debug=args.debug))
except LispSyntaxError as e:
print(e)
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment