Skip to content

Instantly share code, notes, and snippets.

@seungjin
Created August 21, 2010 17:46
Show Gist options
  • Save seungjin/542621 to your computer and use it in GitHub Desktop.
Save seungjin/542621 to your computer and use it in GitHub Desktop.
s-expressino python parsor. ref)http://www.unixuser.org/~euske/python/index.html
#!/usr/bin/env python
##
## sexpr.py - by Yusuke Shinyama
##
## * public domain *
##
from abstfilter import AbstractFeeder, AbstractFilter, AbstractConsumer
## SExprReader
##
class SExprReader(AbstractFilter):
"""Usage:
reader = SExprReader(consumer)
reader.feed("(this is (sexpr))")
reader.close()
"""
COMMENT_BEGIN = ";"
COMMENT_END = "\n"
SEPARATOR = " \t\n"
PAREN_BEGIN = "("
PAREN_END = ")"
QUOTE = '"'
ESCAPE = "\\"
def __init__(self, next_filter,
comment_begin=COMMENT_BEGIN,
comment_end=COMMENT_END,
separator=SEPARATOR,
paren_begin=PAREN_BEGIN,
paren_end=PAREN_END,
quote=QUOTE,
escape=ESCAPE):
AbstractFilter.__init__(self, next_filter)
self.comment_begin = comment_begin
self.comment_end = comment_end
self.separator = separator
self.paren_begin = paren_begin
self.paren_end = paren_end
self.quote = quote
self.escape = escape
self.special = comment_begin + separator + paren_begin + paren_end + quote + escape
self.reset()
return
# SExprReader ignores any error and
# try to continue as long as possible.
# if you want to throw exception however,
# please modify these methods.
# called if redundant parantheses are found.
def illegal_close_paren(self, i):
print "Ignore a close parenthesis: %d" % i
return
# called if it reaches the end-of-file while the stack is not empty.
def premature_eof(self, i, x):
print "Premature end of file: %d parens left, partial=%s" % (i, x)
return
# reset the internal states.
def reset(self):
self.incomment = False # if within a comment.
self.inquote = False # if within a quote.
self.inescape = False # if within a escape.
self.sym = '' # partially constructed symbol.
# NOTICE: None != nil (an empty list)
self.build = None # partially constructed list.
self.build_stack = [] # to store a chain of partial lists.
return self
# analyze strings
def feed(self, tokens):
for (i,c) in enumerate(tokens):
if self.incomment:
# within a comment - skip
self.incomment = (c not in self.comment_end)
elif self.inescape or (c not in self.special):
# add to the current working symbol
self.sym += c
self.inescape = False
elif c in self.escape:
# escape
self.inescape = True
elif self.inquote and (c not in self.quote):
self.sym += c
else:
# special character (blanks, parentheses, or comment)
if self.sym:
# close the current symbol
if self.build == None:
self.feed_next(self.sym)
else:
self.build.append(self.sym)
self.sym = ''
if c in self.comment_begin:
# comment
self.incomment = True
elif c in self.quote:
# quote
self.inquote = not self.inquote
elif c in self.paren_begin:
# beginning a new list.
self.build_stack.append(self.build)
empty = []
if self.build == None:
# begin from a scratch.
self.build = empty
else:
# begin from the end of the current list.
self.build.append(empty)
self.build = empty
elif c in self.paren_end:
# terminating the current list
if self.build == None:
# there must be a working list.
self.illegal_close_paren(i)
else:
if len(self.build_stack) == 1:
# current working list is the last one in the stack.
self.feed_next(self.build)
self.build = self.build_stack.pop()
return self
# terminate
def terminate(self):
# a working list should not exist.
if self.build != None:
# error - still try to construct a partial structure.
if self.sym:
self.build.append(self.sym)
self.sym = ''
if len(self.build_stack) == 1:
x = self.build
else:
x = self.build_stack[1]
self.build = None
self.build_stack = []
self.premature_eof(len(self.build_stack), x)
elif self.sym:
# flush the current working symbol.
self.feed_next(self.sym)
self.sym = ''
return self
# closing.
def close(self):
AbstractFilter.close(self)
self.terminate()
return
## StrictSExprReader
##
class SExprIllegalClosingParenError(ValueError):
"""It throws an exception with an ill-structured input."""
pass
class SExprPrematureEOFError(ValueError):
pass
class StrictSExprReader(SExprReader):
def illegal_close_paren(self, i):
raise SExprIllegalClosingParenError(i)
def premature_eof(self, i, x):
raise SExprPrematureEOFError(i, x)
## str2sexpr
##
class _SExprStrConverter(AbstractConsumer):
results = []
def feed(self, s):
_SExprStrConverter.results.append(s)
return
_str_converter = SExprReader(_SExprStrConverter())
_str_converter_strict = StrictSExprReader(_SExprStrConverter())
def str2sexpr(s):
"""parse a string as a sexpr."""
_SExprStrConverter.results = []
_str_converter.reset().feed(s).terminate()
return _SExprStrConverter.results
def str2sexpr_strict(s):
"""parse a string as a sexpr."""
_SExprStrConverter.results = []
_str_converter_strict.reset().feed(s).terminate()
return _SExprStrConverter.results
## sexpr2str
##
def sexpr2str(e):
"""convert a sexpr into Lisp-like representation."""
if not isinstance(e, list):
return e
return "("+" ".join(map(sexpr2str, e))+")"
# test stuff
def test():
assert str2sexpr("(this ;comment\n is (a test (sentences) (des()) (yo)))") == \
[["this", "is", ["a", "test", ["sentences"], ["des", []], ["yo"]]]]
assert str2sexpr('''(paren\\(\\)theses_in\\#symbol "space in \nsymbol"
this\\ way\\ also. "escape is \\"better than\\" quote")''') == \
[['paren()theses_in#symbol', 'space in \nsymbol', 'this way also.', 'escape is "better than" quote']]
str2sexpr("(this (is (a (parial (sentence")
return
# main
if __name__ == "__main__":
test()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment