Created
September 11, 2012 13:25
-
-
Save dagoof/3698444 to your computer and use it in GitHub Desktop.
Minimal Python PEG implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools, itertools, operator | |
import json | |
class attrdict(dict): | |
def __getattr__(self, key): return self.get(key) | |
def __setattr__(self, key, value): self[key] = value | |
def rankedresult(rank, result): | |
return attrdict(rank = rank, result = result) | |
result_value = operator.attrgetter | |
result_rank = result_value('rank') | |
result_result = result_value('result') | |
echo = lambda r: r | |
p = functools.partial | |
class Matcher(object): | |
def __init__(self, *values): | |
self.func = echo | |
self.values = values | |
def __call__(self, func): | |
self.func = func | |
return self | |
class Instruction(Matcher): | |
def match(self, _input): | |
def _match(instruction): | |
if _input.startswith(instruction): | |
return instruction | |
return '' | |
result = sorted(map(_match, self.values), key = len)[-1] | |
return rankedresult(len(result), self.func(result)) | |
class Or(Matcher): | |
def match(self, _input): | |
def _match(expression): | |
return expression.match(_input) | |
return sorted(map(_match, self.values), key = result_rank)[-1] | |
class Sequence(Matcher): | |
def match(self, _input): | |
collected = [ ] | |
for expression in self.values: | |
collected.append( expression.match( | |
_input[sum(map(result_rank, collected)):])) | |
if not result_rank(collected[-1]): | |
return rankedresult(0, [ ]) | |
return rankedresult(sum(map(result_rank, collected)), | |
self.func(map(result_result, collected))) | |
class Repeated(Matcher): | |
def __init__(self, value): | |
self.func = echo | |
self.value = value | |
def match(self, _input): | |
expression = self.value | |
collected = [ expression.match(_input) ] | |
while result_rank(collected[-1]): | |
collected.append(expression.match( | |
_input[sum(map(result_rank, collected)):])) | |
collected = collected[:-1] | |
return rankedresult(sum(map(result_rank, collected)), | |
self.func(map(result_result, collected))) | |
def crange(start, end): | |
return Instruction(*map(chr, range(ord(start), ord(end) + 1))) | |
if __name__ == '__main__': | |
# Instruction | |
foo_bar = Instruction('foo', 'bar') | |
foobar = Instruction('foobar') | |
print foo_bar.match('foobar') | |
# Or | |
foobars = Or(foo_bar, foobar) | |
print foobars.match('foocar') | |
print foobars.match('foobar') | |
print foobars.match('bar foo') | |
# Sequence | |
separators = Instruction(':', ';', '.', ',') | |
print Sequence(foobars, separators, foobars).match('foo:bar') | |
print Sequence(foobars, separators, foobars).match('foo,foobar') | |
print Sequence(foobars, separators, foobars).match('foob.foobar') | |
# Repeated | |
whitespace = Repeated(Instruction(' ', '\n', '\t')) | |
print whitespace.match(' \n \t ') | |
# cleaners | |
whitespace = Repeated(Instruction(' ', '\n', '\t'))(lambda r: '') | |
print whitespace.match(' \n \t ') | |
lower = crange('a', 'z') | |
upper = crange('A', 'Z') | |
digit = crange('0', '9') | |
word = Repeated(Or(lower, upper, digit))(''.join) | |
sentence = Repeated(Or(word, whitespace))(p(filter, bool)) | |
print sentence.match(''' | |
You havent seen the best yet Throw out your net if you want\ | |
to get a piece of that brick throwin chick that will stick\ | |
to your skin like a cold sweat''') | |
lp = Instruction('(') | |
rp = Instruction(')') | |
s_expression = Sequence(lp, sentence, rp)(lambda (l, body, r): body) | |
print s_expression.match('(filter odd 1 2 3 4 5)') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment