eliasdorneles · June 9, 2016 19:23
diff --git a/my_lisp_parser.py b/my_lisp_parser.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """Lisp parser (future interpreter)
 """

 from __future__ import print_function
 import sys
 import re
 from collections import namedtuple


 PY2 = sys.version_info.major == 2

 raw_input = raw_input if PY2 else input


 class LispSyntaxError(ValueError):
    "Raised for errors in Lisp syntax"


 VALID_IDENT_CHARS = r'-a-zA-Z?!+*/%'

 TOKENS = [
    ('IDENTIFIER',
     '[' + VALID_IDENT_CHARS + '][' + VALID_IDENT_CHARS + '0-9]*'),
    ('LPAREN', r'[(]'),
    ('RPAREN', r'[)]'),
    ('NUMBER', r'\d+'),
 ]


 def read_file(path):
    with open(path) as f:
        return f.read()


 Token = namedtuple('Token', 'name value')


 def _tokenize(s):
    """Generate a list of tokens, ignoring whitespaces
    """
    cur_pos = 0
    while cur_pos < len(s):
        m = re.match(r'^\s+', s[cur_pos:])
        if m:
            cur_pos += m.span()[1]
            continue

        for tok_name, tok_re in TOKENS:
            m = re.match(r'^' + tok_re, s[cur_pos:])
            if m:
                yield Token(tok_name, m.group())
                cur_pos += m.span()[1]
                break
        else:
            raise LispSyntaxError("Unexpected character: %s (position: %d)" % (s[cur_pos], cur_pos))


 def lex(input):
    return list(_tokenize(input))


 def parse(input, single_expr=False, debug=False):
    """Parse minimalist Lisp grammar.
    If single_expr=True, will expect a single expression (useful for REPL).
    If debug=True, will print the tokens before parsing.

    Based on recursive descent parser code at Wikipedia:
    https://en.wikipedia.org/wiki/Recursive_descent_parser
    """
    tokens = lex(input)

    if debug:
        print('Tokens:', tokens)

    tokens = iter(tokens)
    current_token_holder = [None]

    def current():
        """Return current token"""
        return current_token_holder[0]

    def next_token():
        """Advance to next token"""
        try:
            current_token_holder[0] = next(tokens)
        except:
            current_token_holder[0] = None

    def accept(tok_name):
        """If current token matches, return its value and advance to next"""
        if current() is not None and current().name == tok_name:
            value = current().value
            next_token()
            return value

    def expect(tok_name):
        """Accept a token returning its value, or else raise an error
        """
        got = current()
        value = accept(tok_name)
        if not value:
            if got is not None:
                got = got.value
            raise LispSyntaxError("Error: Expected %s but got %s"
                                  % (tok_name, got))
        return value

    def parse_expr():
        """Parse a single expression.

        expr := NUMBER | IDENTIFIER | ( IDENTIFIER expr_list )
        """
        value = accept('NUMBER')
        if value:
            return int(value)

        value = accept('IDENTIFIER')
        if value:
            return value

        value = accept('LPAREN')
        if value:
            ident = expect('IDENTIFIER')
            expr_list = parse_expr_list()
            expect('RPAREN')
            return [ident] + expr_list

    def parse_expr_list():
        """Parse an expression list.

        expr_list := expr | expr EXPR_LIST
        """
        result = []
        value = parse_expr()
        while value is not None:
            result.append(value)
            value = parse_expr()
        return result

    next_token()
    if single_expr:
        result = parse_expr()
    else:
        result = parse_expr_list()

    if current() is not None:
        raise LispSyntaxError('Unexpected token: {}'.format(current().value))

    return result


 def repl(debug=False):
    """Read-Eval-Print Loop

    Currently, this is more like a Read-Parse-Print-Loop :)
    """
    def readline():
        try:
            return raw_input('> ')
        except EOFError:
            return None

    code = readline()
    while code:
        try:
            print(parse(code, single_expr=True, debug=debug))
        except LispSyntaxError as e:
            print('%s' % e)
        code = readline()


 def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('input', nargs='?',
                        help='Source input (if not given, will read stdin)')
    parser.add_argument('--debug', action='store_true')

    return parser.parse_args()


 def main():
    args = parse_args()
    should_repl = not args.input and sys.stdin.isatty()
    if should_repl:
        repl(debug=args.debug)
    else:
        if args.input:
            with open(args.input) as f:
                code = f.read()
        else:
            code = sys.stdin.read()

        try:
            print(parse(code, debug=args.debug))
        except LispSyntaxError as e:
            print(e)
            sys.exit(1)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	"""Lisp parser (future interpreter)
	"""

	from __future__ import print_function
	import sys
	import re
	from collections import namedtuple


	PY2 = sys.version_info.major == 2

	raw_input = raw_input if PY2 else input


	class LispSyntaxError(ValueError):
	"Raised for errors in Lisp syntax"


	VALID_IDENT_CHARS = r'-a-zA-Z?!+*/%'

	TOKENS = [
	('IDENTIFIER',
	'[' + VALID_IDENT_CHARS + '][' + VALID_IDENT_CHARS + '0-9]*'),
	('LPAREN', r'[(]'),
	('RPAREN', r'[)]'),
	('NUMBER', r'\d+'),
	]


	def read_file(path):
	with open(path) as f:
	return f.read()


	Token = namedtuple('Token', 'name value')


	def _tokenize(s):
	"""Generate a list of tokens, ignoring whitespaces
	"""
	cur_pos = 0
	while cur_pos < len(s):
	m = re.match(r'^\s+', s[cur_pos:])
	if m:
	cur_pos += m.span()[1]
	continue

	for tok_name, tok_re in TOKENS:
	m = re.match(r'^' + tok_re, s[cur_pos:])
	if m:
	yield Token(tok_name, m.group())
	cur_pos += m.span()[1]
	break
	else:
	raise LispSyntaxError("Unexpected character: %s (position: %d)" % (s[cur_pos], cur_pos))


	def lex(input):
	return list(_tokenize(input))


	def parse(input, single_expr=False, debug=False):
	"""Parse minimalist Lisp grammar.
	If single_expr=True, will expect a single expression (useful for REPL).
	If debug=True, will print the tokens before parsing.

	Based on recursive descent parser code at Wikipedia:
	https://en.wikipedia.org/wiki/Recursive_descent_parser
	"""
	tokens = lex(input)

	if debug:
	print('Tokens:', tokens)

	tokens = iter(tokens)
	current_token_holder = [None]

	def current():
	"""Return current token"""
	return current_token_holder[0]

	def next_token():
	"""Advance to next token"""
	try:
	current_token_holder[0] = next(tokens)
	except:
	current_token_holder[0] = None

	def accept(tok_name):
	"""If current token matches, return its value and advance to next"""
	if current() is not None and current().name == tok_name:
	value = current().value
	next_token()
	return value

	def expect(tok_name):
	"""Accept a token returning its value, or else raise an error
	"""
	got = current()
	value = accept(tok_name)
	if not value:
	if got is not None:
	got = got.value
	raise LispSyntaxError("Error: Expected %s but got %s"
	% (tok_name, got))
	return value

	def parse_expr():
	"""Parse a single expression.

	expr := NUMBER \| IDENTIFIER \| ( IDENTIFIER expr_list )
	"""
	value = accept('NUMBER')
	if value:
	return int(value)

	value = accept('IDENTIFIER')
	if value:
	return value

	value = accept('LPAREN')
	if value:
	ident = expect('IDENTIFIER')
	expr_list = parse_expr_list()
	expect('RPAREN')
	return [ident] + expr_list

	def parse_expr_list():
	"""Parse an expression list.

	expr_list := expr \| expr EXPR_LIST
	"""
	result = []
	value = parse_expr()
	while value is not None:
	result.append(value)
	value = parse_expr()
	return result

	next_token()
	if single_expr:
	result = parse_expr()
	else:
	result = parse_expr_list()

	if current() is not None:
	raise LispSyntaxError('Unexpected token: {}'.format(current().value))

	return result


	def repl(debug=False):
	"""Read-Eval-Print Loop

	Currently, this is more like a Read-Parse-Print-Loop :)
	"""
	def readline():
	try:
	return raw_input('> ')
	except EOFError:
	return None

	code = readline()
	while code:
	try:
	print(parse(code, single_expr=True, debug=debug))
	except LispSyntaxError as e:
	print('%s' % e)
	code = readline()


	def parse_args():
	import argparse
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument('input', nargs='?',
	help='Source input (if not given, will read stdin)')
	parser.add_argument('--debug', action='store_true')

	return parser.parse_args()


	def main():
	args = parse_args()
	should_repl = not args.input and sys.stdin.isatty()
	if should_repl:
	repl(debug=args.debug)
	else:
	if args.input:
	with open(args.input) as f:
	code = f.read()
	else:
	code = sys.stdin.read()

	try:
	print(parse(code, debug=args.debug))
	except LispSyntaxError as e:
	print(e)
	sys.exit(1)


	if __name__ == '__main__':
	main()