Skip to content

Instantly share code, notes, and snippets.

@dagoof
Created September 11, 2012 13:25
Show Gist options
  • Save dagoof/3698444 to your computer and use it in GitHub Desktop.
Save dagoof/3698444 to your computer and use it in GitHub Desktop.
Minimal Python PEG implementation
import functools, itertools, operator
import json
class attrdict(dict):
def __getattr__(self, key): return self.get(key)
def __setattr__(self, key, value): self[key] = value
def rankedresult(rank, result):
return attrdict(rank = rank, result = result)
result_value = operator.attrgetter
result_rank = result_value('rank')
result_result = result_value('result')
echo = lambda r: r
p = functools.partial
class Matcher(object):
def __init__(self, *values):
self.func = echo
self.values = values
def __call__(self, func):
self.func = func
return self
class Instruction(Matcher):
def match(self, _input):
def _match(instruction):
if _input.startswith(instruction):
return instruction
return ''
result = sorted(map(_match, self.values), key = len)[-1]
return rankedresult(len(result), self.func(result))
class Or(Matcher):
def match(self, _input):
def _match(expression):
return expression.match(_input)
return sorted(map(_match, self.values), key = result_rank)[-1]
class Sequence(Matcher):
def match(self, _input):
collected = [ ]
for expression in self.values:
collected.append( expression.match(
_input[sum(map(result_rank, collected)):]))
if not result_rank(collected[-1]):
return rankedresult(0, [ ])
return rankedresult(sum(map(result_rank, collected)),
self.func(map(result_result, collected)))
class Repeated(Matcher):
def __init__(self, value):
self.func = echo
self.value = value
def match(self, _input):
expression = self.value
collected = [ expression.match(_input) ]
while result_rank(collected[-1]):
collected.append(expression.match(
_input[sum(map(result_rank, collected)):]))
collected = collected[:-1]
return rankedresult(sum(map(result_rank, collected)),
self.func(map(result_result, collected)))
def crange(start, end):
return Instruction(*map(chr, range(ord(start), ord(end) + 1)))
if __name__ == '__main__':
# Instruction
foo_bar = Instruction('foo', 'bar')
foobar = Instruction('foobar')
print foo_bar.match('foobar')
# Or
foobars = Or(foo_bar, foobar)
print foobars.match('foocar')
print foobars.match('foobar')
print foobars.match('bar foo')
# Sequence
separators = Instruction(':', ';', '.', ',')
print Sequence(foobars, separators, foobars).match('foo:bar')
print Sequence(foobars, separators, foobars).match('foo,foobar')
print Sequence(foobars, separators, foobars).match('foob.foobar')
# Repeated
whitespace = Repeated(Instruction(' ', '\n', '\t'))
print whitespace.match(' \n \t ')
# cleaners
whitespace = Repeated(Instruction(' ', '\n', '\t'))(lambda r: '')
print whitespace.match(' \n \t ')
lower = crange('a', 'z')
upper = crange('A', 'Z')
digit = crange('0', '9')
word = Repeated(Or(lower, upper, digit))(''.join)
sentence = Repeated(Or(word, whitespace))(p(filter, bool))
print sentence.match('''
You havent seen the best yet Throw out your net if you want\
to get a piece of that brick throwin chick that will stick\
to your skin like a cold sweat''')
lp = Instruction('(')
rp = Instruction(')')
s_expression = Sequence(lp, sentence, rp)(lambda (l, body, r): body)
print s_expression.match('(filter odd 1 2 3 4 5)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment