Skip to content

Instantly share code, notes, and snippets.

@whosaysni
Last active June 19, 2016 15:04
Show Gist options
  • Save whosaysni/0112f51f1e915d2037861fc08ef0e249 to your computer and use it in GitHub Desktop.
Save whosaysni/0112f51f1e915d2037861fc08ef0e249 to your computer and use it in GitHub Desktop.
PSP parser in pure python
# coding: utf-8
"""PPSP: Pure Python Server Pages
(c) 2016 Yasushi Masuda.
"""
from collections import OrderedDict
from re import compile, M, S
# State symbols
TEXT = 'T'
# Pattern map for each state
STATE_PATTERN_MAP = (
(None, [
('.', 'none_process_char'),
('^$', 'none_process_eof'),
]),
(TEXT, [
('<%=(?P<expr>.*?)%>', 'text_process_varref'),
('<%@ *include +file="(?P<filename>[^" ]+)" *%>', 'text_process_include'),
('<%--(?P<comment>.*?)--%>', 'text_process_comment'),
('<%(?P<code>.*?)%>', 'text_process_pycode'),
('.', 'text_process_char'),
('^$', 'text_process_eof'),
]),
)
class PspParserError(Exception):
pass
class PspParser(object):
"""A simple PSP implementation."""
def __init__(self, source, filename=None, initial_state=None, indent_char='\t',
parent=None, step_torelance=2**24):
self.parent = parent
# traverse parent to avoid cyclic inclusion
inclusion_chain = []
_parent = parent
while _parent:
inclusion_chain.append(_parent.filename)
_parent = _parent.parent
if filename in inclusion_chain:
raise PspParserError('Cyclic inclusion error.')
self.source = source
self.filename = filename
self.lineno = 0
self.state = initial_state
self.indent_char = indent_char
self.indent_level = 0
self.text = ''
self.pycode = ''
self.step_torelance = step_torelance
self.state_pattern_map = dict(
(state,
OrderedDict(
(compile(pattern, S), getattr(self, method_name))
for pattern, method_name in pattern_map))
for state, pattern_map in STATE_PATTERN_MAP)
def run(self):
"""Feed psp string and run"""
nc_step = 0 # number of non-consumption steps
while self.source:
nc_step += 1
srclen = len(self.source)
if self.step_torelance and nc_step > self.step_torelance:
raise PspParserError('Possible infinite loop in parsing.')
pattern_map = self.state_pattern_map[self.state]
for pattern, method in pattern_map.items():
found = pattern.match(self.source, 0)
if found:
method(found)
break
else:
raise PspParserError(
'Unexpected token.',
args=(self.filename, self.lineno))
# reset counter if source is consumed
if srclen < len(self.source):
nc_step = 0
self.flush_text()
return self.pycode
def consume(self, found):
"""Remove token from input"""
start, end = found.span()
consumed = self.source[:end] \
.replace('\r\n', '\n') \
.replace('\r', '\n')
self.lineno += consumed.count('\n')
self.source = self.source[end:]
def indented(self, string):
"""Return indented text"""
return self.indent_char * self.indent_level + string
def flush_text(self):
"""Flush buffered text"""
if self.text:
code = 'req.write("""%s""",0)\n' % self.text
self.pycode += self.indented(code)
self.text = ''
def none_process_char(self, found):
self.text = ''
self.state = TEXT
def none_process_eof(self, found):
self.text = ''
self.state = TEXT
def text_process_varref(self, found):
self.flush_text()
expr = found.groupdict().get('expr').strip()
s = 'req.write(str(%s))\n' % (expr)
self.pycode += s
self.consume(found)
def text_process_include(self, found):
self.flush_text()
filename = found.groupdict().get('filename')
try:
with open(filename, 'rb') as infile:
child_parser = PspParser(
infile.read(), filename, self.state,
self.indent_char, self, self.step_torelance)
self.pycode += child_parser.run()
self.state = child.state
except IOError as exc:
raise PspParserError(
'Unable to open included file.',
args=(filename,))
self.consume(found)
def text_process_comment(self, found):
self.consume(found)
def text_process_pycode(self, found):
self.flush_text()
code = found.groupdict().get('code').strip()
code = code.replace('\r\n', '\n').replace('\r', '\n')
self.consume(found)
lines = code.splitlines()
if len(lines) > 1:
for line in lines:
self.pycode += self.indented(line) + '\n'
else:
self.pycode += self.indented(code) + '\n'
if code == '':
self.indent_level -= 1
elif code.endswith(':'):
self.indent_level += 1
def text_process_char(self, found):
char = self.source[:found.end()]
if char in ['"']:
char = '\\'+char
self.text += char
self.consume(found)
def text_process_eof(self, found):
self.flush_text()
if __name__=='__main__':
import sys
psp_str = ''
if len(sys.argv) > 1:
psp_str = open(sys.argv[1], 'rb').read()
p = PspParser(psp_str)
print p.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment