Last active
June 19, 2016 15:04
-
-
Save whosaysni/0112f51f1e915d2037861fc08ef0e249 to your computer and use it in GitHub Desktop.
PSP parser in pure python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
"""PPSP: Pure Python Server Pages | |
(c) 2016 Yasushi Masuda. | |
""" | |
from collections import OrderedDict | |
from re import compile, M, S | |
# State symbols | |
TEXT = 'T' | |
# Pattern map for each state | |
STATE_PATTERN_MAP = ( | |
(None, [ | |
('.', 'none_process_char'), | |
('^$', 'none_process_eof'), | |
]), | |
(TEXT, [ | |
('<%=(?P<expr>.*?)%>', 'text_process_varref'), | |
('<%@ *include +file="(?P<filename>[^" ]+)" *%>', 'text_process_include'), | |
('<%--(?P<comment>.*?)--%>', 'text_process_comment'), | |
('<%(?P<code>.*?)%>', 'text_process_pycode'), | |
('.', 'text_process_char'), | |
('^$', 'text_process_eof'), | |
]), | |
) | |
class PspParserError(Exception): | |
pass | |
class PspParser(object): | |
"""A simple PSP implementation.""" | |
def __init__(self, source, filename=None, initial_state=None, indent_char='\t', | |
parent=None, step_torelance=2**24): | |
self.parent = parent | |
# traverse parent to avoid cyclic inclusion | |
inclusion_chain = [] | |
_parent = parent | |
while _parent: | |
inclusion_chain.append(_parent.filename) | |
_parent = _parent.parent | |
if filename in inclusion_chain: | |
raise PspParserError('Cyclic inclusion error.') | |
self.source = source | |
self.filename = filename | |
self.lineno = 0 | |
self.state = initial_state | |
self.indent_char = indent_char | |
self.indent_level = 0 | |
self.text = '' | |
self.pycode = '' | |
self.step_torelance = step_torelance | |
self.state_pattern_map = dict( | |
(state, | |
OrderedDict( | |
(compile(pattern, S), getattr(self, method_name)) | |
for pattern, method_name in pattern_map)) | |
for state, pattern_map in STATE_PATTERN_MAP) | |
def run(self): | |
"""Feed psp string and run""" | |
nc_step = 0 # number of non-consumption steps | |
while self.source: | |
nc_step += 1 | |
srclen = len(self.source) | |
if self.step_torelance and nc_step > self.step_torelance: | |
raise PspParserError('Possible infinite loop in parsing.') | |
pattern_map = self.state_pattern_map[self.state] | |
for pattern, method in pattern_map.items(): | |
found = pattern.match(self.source, 0) | |
if found: | |
method(found) | |
break | |
else: | |
raise PspParserError( | |
'Unexpected token.', | |
args=(self.filename, self.lineno)) | |
# reset counter if source is consumed | |
if srclen < len(self.source): | |
nc_step = 0 | |
self.flush_text() | |
return self.pycode | |
def consume(self, found): | |
"""Remove token from input""" | |
start, end = found.span() | |
consumed = self.source[:end] \ | |
.replace('\r\n', '\n') \ | |
.replace('\r', '\n') | |
self.lineno += consumed.count('\n') | |
self.source = self.source[end:] | |
def indented(self, string): | |
"""Return indented text""" | |
return self.indent_char * self.indent_level + string | |
def flush_text(self): | |
"""Flush buffered text""" | |
if self.text: | |
code = 'req.write("""%s""",0)\n' % self.text | |
self.pycode += self.indented(code) | |
self.text = '' | |
def none_process_char(self, found): | |
self.text = '' | |
self.state = TEXT | |
def none_process_eof(self, found): | |
self.text = '' | |
self.state = TEXT | |
def text_process_varref(self, found): | |
self.flush_text() | |
expr = found.groupdict().get('expr').strip() | |
s = 'req.write(str(%s))\n' % (expr) | |
self.pycode += s | |
self.consume(found) | |
def text_process_include(self, found): | |
self.flush_text() | |
filename = found.groupdict().get('filename') | |
try: | |
with open(filename, 'rb') as infile: | |
child_parser = PspParser( | |
infile.read(), filename, self.state, | |
self.indent_char, self, self.step_torelance) | |
self.pycode += child_parser.run() | |
self.state = child.state | |
except IOError as exc: | |
raise PspParserError( | |
'Unable to open included file.', | |
args=(filename,)) | |
self.consume(found) | |
def text_process_comment(self, found): | |
self.consume(found) | |
def text_process_pycode(self, found): | |
self.flush_text() | |
code = found.groupdict().get('code').strip() | |
code = code.replace('\r\n', '\n').replace('\r', '\n') | |
self.consume(found) | |
lines = code.splitlines() | |
if len(lines) > 1: | |
for line in lines: | |
self.pycode += self.indented(line) + '\n' | |
else: | |
self.pycode += self.indented(code) + '\n' | |
if code == '': | |
self.indent_level -= 1 | |
elif code.endswith(':'): | |
self.indent_level += 1 | |
def text_process_char(self, found): | |
char = self.source[:found.end()] | |
if char in ['"']: | |
char = '\\'+char | |
self.text += char | |
self.consume(found) | |
def text_process_eof(self, found): | |
self.flush_text() | |
if __name__=='__main__': | |
import sys | |
psp_str = '' | |
if len(sys.argv) > 1: | |
psp_str = open(sys.argv[1], 'rb').read() | |
p = PspParser(psp_str) | |
print p.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment