Created
August 21, 2010 17:46
-
-
Save seungjin/542621 to your computer and use it in GitHub Desktop.
s-expressino python parsor. ref)http://www.unixuser.org/~euske/python/index.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
## | |
## sexpr.py - by Yusuke Shinyama | |
## | |
## * public domain * | |
## | |
from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer | |
## SExprReader | |
## | |
class SExprReader(AbstractFilter): | |
"""Usage: | |
reader = SExprReader(consumer) | |
reader.feed("(this is (sexpr))") | |
reader.close() | |
""" | |
COMMENT_BEGIN = ";" | |
COMMENT_END = "\n" | |
SEPARATOR = " \t\n" | |
PAREN_BEGIN = "(" | |
PAREN_END = ")" | |
QUOTE = '"' | |
ESCAPE = "\\" | |
def __init__(self, next_filter, | |
comment_begin=COMMENT_BEGIN, | |
comment_end=COMMENT_END, | |
separator=SEPARATOR, | |
paren_begin=PAREN_BEGIN, | |
paren_end=PAREN_END, | |
quote=QUOTE, | |
escape=ESCAPE): | |
AbstractFilter.__init__(self, next_filter) | |
self.comment_begin = comment_begin | |
self.comment_end = comment_end | |
self.separator = separator | |
self.paren_begin = paren_begin | |
self.paren_end = paren_end | |
self.quote = quote | |
self.escape = escape | |
self.special = comment_begin + separator + paren_begin + paren_end + quote + escape | |
self.reset() | |
return | |
# SExprReader ignores any error and | |
# try to continue as long as possible. | |
# if you want to throw exception however, | |
# please modify these methods. | |
# called if redundant parantheses are found. | |
def illegal_close_paren(self, i): | |
print "Ignore a close parenthesis: %d" % i | |
return | |
# called if it reaches the end-of-file while the stack is not empty. | |
def premature_eof(self, i, x): | |
print "Premature end of file: %d parens left, partial=%s" % (i, x) | |
return | |
# reset the internal states. | |
def reset(self): | |
self.incomment = False # if within a comment. | |
self.inquote = False # if within a quote. | |
self.inescape = False # if within a escape. | |
self.sym = '' # partially constructed symbol. | |
# NOTICE: None != nil (an empty list) | |
self.build = None # partially constructed list. | |
self.build_stack = [] # to store a chain of partial lists. | |
return self | |
# analyze strings | |
def feed(self, tokens): | |
for (i,c) in enumerate(tokens): | |
if self.incomment: | |
# within a comment - skip | |
self.incomment = (c not in self.comment_end) | |
elif self.inescape or (c not in self.special): | |
# add to the current working symbol | |
self.sym += c | |
self.inescape = False | |
elif c in self.escape: | |
# escape | |
self.inescape = True | |
elif self.inquote and (c not in self.quote): | |
self.sym += c | |
else: | |
# special character (blanks, parentheses, or comment) | |
if self.sym: | |
# close the current symbol | |
if self.build == None: | |
self.feed_next(self.sym) | |
else: | |
self.build.append(self.sym) | |
self.sym = '' | |
if c in self.comment_begin: | |
# comment | |
self.incomment = True | |
elif c in self.quote: | |
# quote | |
self.inquote = not self.inquote | |
elif c in self.paren_begin: | |
# beginning a new list. | |
self.build_stack.append(self.build) | |
empty = [] | |
if self.build == None: | |
# begin from a scratch. | |
self.build = empty | |
else: | |
# begin from the end of the current list. | |
self.build.append(empty) | |
self.build = empty | |
elif c in self.paren_end: | |
# terminating the current list | |
if self.build == None: | |
# there must be a working list. | |
self.illegal_close_paren(i) | |
else: | |
if len(self.build_stack) == 1: | |
# current working list is the last one in the stack. | |
self.feed_next(self.build) | |
self.build = self.build_stack.pop() | |
return self | |
# terminate | |
def terminate(self): | |
# a working list should not exist. | |
if self.build != None: | |
# error - still try to construct a partial structure. | |
if self.sym: | |
self.build.append(self.sym) | |
self.sym = '' | |
if len(self.build_stack) == 1: | |
x = self.build | |
else: | |
x = self.build_stack[1] | |
self.build = None | |
self.build_stack = [] | |
self.premature_eof(len(self.build_stack), x) | |
elif self.sym: | |
# flush the current working symbol. | |
self.feed_next(self.sym) | |
self.sym = '' | |
return self | |
# closing. | |
def close(self): | |
AbstractFilter.close(self) | |
self.terminate() | |
return | |
## StrictSExprReader | |
## | |
class SExprIllegalClosingParenError(ValueError): | |
"""It throws an exception with an ill-structured input.""" | |
pass | |
class SExprPrematureEOFError(ValueError): | |
pass | |
class StrictSExprReader(SExprReader): | |
def illegal_close_paren(self, i): | |
raise SExprIllegalClosingParenError(i) | |
def premature_eof(self, i, x): | |
raise SExprPrematureEOFError(i, x) | |
## str2sexpr | |
## | |
class _SExprStrConverter(AbstractConsumer): | |
results = [] | |
def feed(self, s): | |
_SExprStrConverter.results.append(s) | |
return | |
_str_converter = SExprReader(_SExprStrConverter()) | |
_str_converter_strict = StrictSExprReader(_SExprStrConverter()) | |
def str2sexpr(s): | |
"""parse a string as a sexpr.""" | |
_SExprStrConverter.results = [] | |
_str_converter.reset().feed(s).terminate() | |
return _SExprStrConverter.results | |
def str2sexpr_strict(s): | |
"""parse a string as a sexpr.""" | |
_SExprStrConverter.results = [] | |
_str_converter_strict.reset().feed(s).terminate() | |
return _SExprStrConverter.results | |
## sexpr2str | |
## | |
def sexpr2str(e): | |
"""convert a sexpr into Lisp-like representation.""" | |
if not isinstance(e, list): | |
return e | |
return "("+" ".join(map(sexpr2str, e))+")" | |
# test stuff | |
def test(): | |
assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \ | |
[["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]] | |
assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol" | |
this\\ way\\ also. "escape is \\"better than\\" quote")''') == \ | |
[['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']] | |
str2sexpr("(this (is (a (parial (sentence") | |
return | |
# main | |
if __name__ == "__main__": | |
test() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment