Created
          June 9, 2016 19:23 
        
      - 
      
 - 
        
Save eliasdorneles/d04785be555046de66d0237d47aaa663 to your computer and use it in GitHub Desktop.  
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """Lisp parser (future interpreter) | |
| """ | |
| from __future__ import print_function | |
| import sys | |
| import re | |
| from collections import namedtuple | |
| PY2 = sys.version_info.major == 2 | |
| raw_input = raw_input if PY2 else input | |
| class LispSyntaxError(ValueError): | |
| "Raised for errors in Lisp syntax" | |
| VALID_IDENT_CHARS = r'-a-zA-Z?!+*/%' | |
| TOKENS = [ | |
| ('IDENTIFIER', | |
| '[' + VALID_IDENT_CHARS + '][' + VALID_IDENT_CHARS + '0-9]*'), | |
| ('LPAREN', r'[(]'), | |
| ('RPAREN', r'[)]'), | |
| ('NUMBER', r'\d+'), | |
| ] | |
| def read_file(path): | |
| with open(path) as f: | |
| return f.read() | |
| Token = namedtuple('Token', 'name value') | |
| def _tokenize(s): | |
| """Generate a list of tokens, ignoring whitespaces | |
| """ | |
| cur_pos = 0 | |
| while cur_pos < len(s): | |
| m = re.match(r'^\s+', s[cur_pos:]) | |
| if m: | |
| cur_pos += m.span()[1] | |
| continue | |
| for tok_name, tok_re in TOKENS: | |
| m = re.match(r'^' + tok_re, s[cur_pos:]) | |
| if m: | |
| yield Token(tok_name, m.group()) | |
| cur_pos += m.span()[1] | |
| break | |
| else: | |
| raise LispSyntaxError("Unexpected character: %s (position: %d)" % (s[cur_pos], cur_pos)) | |
| def lex(input): | |
| return list(_tokenize(input)) | |
| def parse(input, single_expr=False, debug=False): | |
| """Parse minimalist Lisp grammar. | |
| If single_expr=True, will expect a single expression (useful for REPL). | |
| If debug=True, will print the tokens before parsing. | |
| Based on recursive descent parser code at Wikipedia: | |
| https://en.wikipedia.org/wiki/Recursive_descent_parser | |
| """ | |
| tokens = lex(input) | |
| if debug: | |
| print('Tokens:', tokens) | |
| tokens = iter(tokens) | |
| current_token_holder = [None] | |
| def current(): | |
| """Return current token""" | |
| return current_token_holder[0] | |
| def next_token(): | |
| """Advance to next token""" | |
| try: | |
| current_token_holder[0] = next(tokens) | |
| except: | |
| current_token_holder[0] = None | |
| def accept(tok_name): | |
| """If current token matches, return its value and advance to next""" | |
| if current() is not None and current().name == tok_name: | |
| value = current().value | |
| next_token() | |
| return value | |
| def expect(tok_name): | |
| """Accept a token returning its value, or else raise an error | |
| """ | |
| got = current() | |
| value = accept(tok_name) | |
| if not value: | |
| if got is not None: | |
| got = got.value | |
| raise LispSyntaxError("Error: Expected %s but got %s" | |
| % (tok_name, got)) | |
| return value | |
| def parse_expr(): | |
| """Parse a single expression. | |
| expr := NUMBER | IDENTIFIER | ( IDENTIFIER expr_list ) | |
| """ | |
| value = accept('NUMBER') | |
| if value: | |
| return int(value) | |
| value = accept('IDENTIFIER') | |
| if value: | |
| return value | |
| value = accept('LPAREN') | |
| if value: | |
| ident = expect('IDENTIFIER') | |
| expr_list = parse_expr_list() | |
| expect('RPAREN') | |
| return [ident] + expr_list | |
| def parse_expr_list(): | |
| """Parse an expression list. | |
| expr_list := expr | expr EXPR_LIST | |
| """ | |
| result = [] | |
| value = parse_expr() | |
| while value is not None: | |
| result.append(value) | |
| value = parse_expr() | |
| return result | |
| next_token() | |
| if single_expr: | |
| result = parse_expr() | |
| else: | |
| result = parse_expr_list() | |
| if current() is not None: | |
| raise LispSyntaxError('Unexpected token: {}'.format(current().value)) | |
| return result | |
| def repl(debug=False): | |
| """Read-Eval-Print Loop | |
| Currently, this is more like a Read-Parse-Print-Loop :) | |
| """ | |
| def readline(): | |
| try: | |
| return raw_input('> ') | |
| except EOFError: | |
| return None | |
| code = readline() | |
| while code: | |
| try: | |
| print(parse(code, single_expr=True, debug=debug)) | |
| except LispSyntaxError as e: | |
| print('%s' % e) | |
| code = readline() | |
| def parse_args(): | |
| import argparse | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument('input', nargs='?', | |
| help='Source input (if not given, will read stdin)') | |
| parser.add_argument('--debug', action='store_true') | |
| return parser.parse_args() | |
| def main(): | |
| args = parse_args() | |
| should_repl = not args.input and sys.stdin.isatty() | |
| if should_repl: | |
| repl(debug=args.debug) | |
| else: | |
| if args.input: | |
| with open(args.input) as f: | |
| code = f.read() | |
| else: | |
| code = sys.stdin.read() | |
| try: | |
| print(parse(code, debug=args.debug)) | |
| except LispSyntaxError as e: | |
| print(e) | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment