Created
September 2, 2011 13:42
-
-
Save dahlia/1188614 to your computer and use it in GitHub Desktop.
Lisp Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""":mod:`lispparser` --- Lisp parser | |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
""" | |
__author__ = 'Hong Minhe <minhee' '@' 'dahlia.kr>' | |
__license__ = 'Public Domain' | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""":mod:`lispparser.exc` --- Parsing errors | |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
""" | |
import exceptions | |
class SyntaxError(exceptions.SyntaxError): | |
@property | |
def column(self): | |
try: | |
rfind = self.code.rindex('\n', 0, self.offset) | |
except ValueError: | |
return self.offset | |
else: | |
return self.offset - rfind | |
@property | |
def line(self): | |
return self.code.count('\n', 0, self.offset) | |
@property | |
def offset_indicator(self): | |
line = self.code.splitlines()[self.line] | |
return '\n'.join([line, ' ' * self.column + '^']) | |
def print_syntax_error(self): | |
from sys import stderr | |
print>>stderr, type(self).__name__ + ':', str(self) | |
line = self.line + 1 | |
col = self.column + 1 | |
print>>stderr, 'Lexing error({0}:{1}):'.format(line, col) | |
print>>stderr, '{0.offset_indicator}'.format(self) | |
class LexingError(SyntaxError): | |
__slots__ = 'code', 'offset' | |
def __init__(self, code, offset, message=None): | |
SyntaxError.__init__(self, message) | |
self.code = code | |
self.offset = offset | |
class ParsingError(SyntaxError): | |
__slots__ = 'token', | |
def __init__(self, token, message=None): | |
SyntaxError.__init__(self, message) | |
self.token = token | |
@property | |
def code(self): | |
return self.token.code | |
@property | |
def offset(self): | |
return self.token.offset | |
class UnopenedParenthesisError(ParsingError): | |
def __init__(self, token, offset, message=None): | |
ParsingError.__init__(self, token, message) | |
self.parsing_offset = offset | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""":mod:`lispparser.lexer` --- Lisp tokenizer | |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
""" | |
import re | |
import collections | |
import lispparser.exc | |
TOKEN_RE = re.compile(r''' | |
(?P<open> [({] | \[ ) | | |
(?P<close> [)}] | \] ) | | |
(?P<number> \d+(?:\.\d+)? ) | | |
(?P<string> " (?: [^\\"] | \\ . )* " | ' (?: [^\\'] | \\ . )* ' ) | | |
(?P<symbol> [-_A-Za-z+*/%?!<>=][-_A-Za-z0-9+*/%?!<>=]* ) | |
''', re.VERBOSE) | |
Token = collections.namedtuple('Token', 'type token code offset') | |
def tokenize(code): | |
i = 0 | |
for match in TOKEN_RE.finditer(code): | |
cursor = match.start() | |
if i != cursor and code[i:cursor].strip() != '': | |
raise lispparser.exc.LexingError(code, i, 'unexpected token') | |
i = match.end() | |
for type, token in match.groupdict().iteritems(): | |
if token: | |
yield Token(type, token, code, cursor) | |
break | |
if __name__ == '__main__': | |
import sys | |
code = sys.stdin.read() | |
try: | |
tokens = list(tokenize(code)) | |
except lispparser.exc.LexingError as e: | |
e.print_syntax_error() | |
else: | |
for token in tokens: | |
print '{0.type}({0.offset}): {0.token}'.format(token) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""":mod:`lispparser.parser` --- Lisp parser | |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
""" | |
import lispparser.lexer | |
import lispparser.exc | |
class Symbol(object): | |
__slots__ = 'symbol', | |
def __init__(self, symbol): | |
self.symbol = symbol | |
def __str__(self): | |
return str(self.symbol) | |
def __unicode__(self): | |
return unicode(self.symbol) | |
def __repr__(self): | |
return 'Symbol({0!r})'.format(self.symbol) | |
def parse(tokens): | |
if isinstance(tokens, basestring): | |
tokens = lispparser.lexer.tokenize(tokens) | |
tokens = list(tokens) | |
goto = None | |
for i, token in enumerate(tokens): | |
if goto is not None: | |
if i <= goto: | |
continue | |
goto = None | |
type, string, code, offset = token | |
if type == 'open': | |
lst = [] | |
try: | |
for el in parse(tokens[i + 1:]): | |
lst.append(el) | |
except lispparser.exc.UnopenedParenthesisError as e: | |
goto = i + e.parsing_offset + 1 | |
yield lst | |
elif type == 'close': | |
raise lispparser.exc.UnopenedParenthesisError( | |
token, i, | |
'expected opened list' | |
) | |
elif type == 'number': | |
if '.' in token.token: | |
yield float(token.token) | |
else: | |
yield int(token.token) | |
elif type == 'symbol': | |
yield Symbol(token.token) | |
elif type == 'string': | |
yield eval(token.token) | |
else: | |
raise lispparser.exc.ParsingError(token, 'unexpected token type') | |
if __name__ == '__main__': | |
import sys | |
import pprint | |
try: | |
tree = list(parse(sys.stdin.read())) | |
except lispparser.exc.SyntaxError as e: | |
e.print_syntax_error() | |
else: | |
for form in tree: | |
pprint.pprint(form) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{define (factorial n) | |
{if [> n 1] | |
(* n (factorial (- n 1)) | |
1)}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment