Last active
July 25, 2024 23:55
-
-
Save kachayev/987a78d8f2705a733761 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Alexey Kachayev, 2014 | |
## Link to slides: | |
## http://goo.gl/n4ylC4 | |
## Basic: | |
## type Parser = String -> Tree | |
## Composition | |
## type Parser = String -> (Tree, String) | |
## Generalise | |
## type Parser a = String -> (a, String) | |
## Errors | |
## type Parser a = String -> Maybe (a, String) | |
## Better... | |
## type Parser a = String -> [(a, String)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# match any letter | |
def any(inp): | |
return [(inp[0], inp[1:])] if inp else [] | |
any("python2.7") | |
# => [('p', 'ython2.7')] | |
any("") | |
# = [] | |
# match only given letter | |
def match(p): | |
def _inner(inp): | |
return [(inp[0], inp[1:])] if (inp and inp[0] == p) else [] | |
return _inner | |
p1 = match("p") | |
p1("python2.7") | |
# => [('p', 'ython2.7')] | |
p1("scala2.11") | |
# => [] | |
# skip given letter | |
def skip(p): | |
def _inner(inp): | |
return [("", inp[1:])] if (inp and inp[0] == p) else [] | |
return _inner | |
p2 = skip("(") | |
# => [('', 'python2.7)')] | |
# always produce given elem | |
def unit(p): | |
return lambda inp: [(p, inp)] | |
p3 = unit("P") | |
p3("python2.7") | |
# => [('P', 'python2.7')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import partial | |
from operator import eq | |
# sat :: (a -> Bool) -> Parser a | |
def sat(pred): | |
return lambda inp: filter(lambda (v,_): pred(v), any(inp)) | |
# match :: a -> Parser a | |
def match(p): | |
return sat(partial(eq, p)) | |
p5 = match("P") | |
p5("Python2.7") | |
# => [('P', 'ython2.7')] | |
# skip :: Char -> Parser Char | |
def skip(p): | |
return lambda inp: map(lambda (_, tail): ("", tail), match(p)(inp)) | |
p6 = skip("P") | |
p6("Python2.7") | |
# => [('', 'ython2.7')] | |
p6("python2.7") | |
# => [] | |
from operator import methodcaller | |
digit = sat(methodcaller("isdigit")) | |
digit("2.7python") | |
# => [('2', '.7python')] | |
digit("python2.7") | |
# => [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fn.iters import filter, first, rest | |
class parser(object): | |
def __init__(self, f): | |
self._f = f | |
def parse(self, inp): | |
return self._f(inp) | |
def parseAll(self, inp): | |
f = first(filter(lambda (v, tail): tail == "", self.parse(inp))) | |
return f[0] if f else None | |
def __call__(self, inp): | |
return self.parse(inp) | |
def __or__(self, r): | |
return parser(lambda inp: self.parse(inp) + r.parse(inp)) | |
def __and__(self, r): | |
def _inner(inp): | |
return [((v1, v2), tail2) for (v1,tail1) in self.parse(inp) | |
for (v2, tail2) in r.parse(tail1)] | |
return parser(_inner) | |
any = parser(lambda inp: [(inp[0], inp[1:])] if inp else []) | |
def unit(p): | |
return parser(lambda inp: [(p, inp)]) | |
def sat(pred): | |
return parser(lambda inp: filter(lambda (v,_): pred(v), any(inp))) | |
def match(p): | |
return sat(partial(eq, p)) | |
def skip(p): | |
return parser(lambda inp: map(lambda (_, tail): ("", tail), match(p).parse(inp))) | |
char = match | |
digit = sat(methodcaller("isdigit")) | |
p11 = digit & char(".") & digit | |
p11.parse("2.7") | |
# => [((('2', '.'), '7'), '')] | |
def many(prs): | |
return (prs & parser(lambda inp: many(prs).parse(inp))) | unit(()) | |
def string(s): | |
return reduce(and_, map(match, s)) | |
string("python").parse("python2.7") | |
# => [(((((('p', 'y'), 't'), 'h'), 'o'), 'n'), '2.7')] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fn import Stream | |
combine = lambda v1,v2: (v1, v2) | |
class parser(object): | |
def __init__(self, f): | |
self._f = f | |
def parse(self, inp): | |
if inp == "": return [] | |
return self._f(inp) | |
def parseAll(self, inp): | |
f = first(filter(lambda (v, tail): tail == "", self.parse(inp))) | |
return f[0] if f else None | |
@classmethod | |
def delay(cls, f, *args): | |
return cls(lambda inp: f(*args).parse(inp)) | |
def andThen(self, r): | |
def _parse(inp): | |
for (v1, tail1) in self.parse(inp): | |
for (v2, tail2) in r.parse(tail1): | |
yield (combine(v1, v2), tail2) | |
return parser(_parse) | |
def orElse(self, r): | |
return parser(lambda inp: Stream() << self.parse(inp) << r.parse(inp)) | |
def __call__(self, inp): | |
return self.parse(inp) | |
def __or__(self, r): | |
return self.orElse(r) | |
def __and__(self, r): | |
return self.andThen(r) | |
from operator import add | |
combine = add | |
# simple example | |
# expr -> "python" \d "." \d | |
version = digit & char(".") & digit | |
py_ver = string("python").andThen(version) | |
py_ver.parseAll("python2.7") | |
# => 'python2.7' | |
py_ver.parseAll("python3.2") | |
# => 'python3.2' | |
py_ver.parseAll("scala2.11") | |
# => None | |
# more | |
# parse [1,2,3,4,5] | |
# expr -> "[" \d ("," \d)* "]" | |
ints = digit.andThen(many(skip(",").andThen(digit))) | |
wrapped = skip("[") & ints & skip("]") | |
wrapped.parseAll("[1,2,3,4,5]") | |
# => '12345' | |
wrapped.parseAll("[1,2,3,4,") | |
# => None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p :: Parser String | |
p = do char '[' | |
d <- digit | |
ds <- many (do {char ','; digit}) | |
char ']' | |
return (d:ds) | |
char '[' >>= \_ -> | |
digit >>= \d -> | |
digits >>= \ds -> | |
char ']' >>= _ -> | |
f d ds |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class parser(object): | |
def __init__(self, f): | |
self._f = f | |
def parse(self, inp): | |
return self._f(inp) | |
def parseAll(self, inp): | |
f = first(filter(lambda (v, tail): tail == "", self.parse(inp))) | |
return f[0] if f else None | |
@classmethod | |
def delay(cls, f, *args): | |
return cls(lambda inp: f(*args).parse(inp)) | |
# unit :: a -> Parser a | |
@classmethod | |
def unit(cls, a): | |
return cls(lambda inp: (yield (a, inp))) | |
# bind :: Parser a-> (a -> Parser b) -> Parser b | |
def bind(self, f): | |
def _parse(inp): | |
for (v, tail) in self.parse(inp): | |
for (v2, tail2) in f(v).parse(tail): | |
yield (v2, tail2) | |
return parser(_parse) | |
# lift :: (a -> b) -> (a -> Parser b) | |
@classmethod | |
def lift(cls, f): | |
return (lambda v: cls.unit(f(v))) | |
# lifted :: Parser a -> (a -> b) -> Parser b | |
def lifted(self, f): | |
return self.bind(parser.lift(f)) | |
def __rshift__(self, f): | |
return self.bind(f) | |
def andThen(self, r): | |
return self.bind(lambda v1: | |
r.bind(lambda v2: | |
parser.unit(combine(v1, v2)))) | |
def orElse(self, r): | |
return parser(lambda inp: Stream() << self.parse(inp) << r.parse(inp)) | |
def __call__(self, inp): | |
return self.parse(inp) | |
def __or__(self, r): | |
return self.orElse(r) | |
def __and__(self, r): | |
return self.andThen(r) | |
# use it | |
any = parser(lambda inp: [(inp[0], inp[1:])] if inp else []) | |
failure = parser(lambda _: []) | |
def sat(pred): | |
return any >> (lambda v: parser.unit(v) if pred(v) else failure) | |
def match(p): | |
return sat(partial(eq, p)) | |
def skip(p): | |
return match(p).lifted(lambda _: "") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
digits = many(char(",") >> (lambda _: | |
digit >> (lambda d: | |
parser.unit(d)))) | |
p8 = char("[") >> (lambda _: | |
digit >> (lambda d: | |
digits >> (lambda ds: | |
char("]") >> (lambda _: | |
parser.unit(d + ds))))) | |
p8.parseAll("[1,2,3,4,5]") | |
# => "12345" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ret = parser.unit | |
def do(f): | |
def _runner(*args, **kwargs): | |
gen = f(*args, **kwargs) | |
def _parse(inp): | |
s = gen.send(None) | |
linp = inp | |
while 1: | |
r = list(s.parse(linp)) | |
if len(r): | |
(a, tail) = first(r) | |
try: s = gen.send(a) | |
except StopIteration, e: | |
return [(a, tail)] | |
else: linp = tail | |
else: return [] | |
return parser(_parse) | |
return _runner | |
@do | |
def p(): | |
yield char("[") | |
d = yield digit | |
ds = yield many(skip(",").andThen(digit)) | |
yield char("]") | |
yield ret(d + ds) | |
p().parseAll("[1,2,3,4,5]") | |
# => "12345" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def choice(ps): return reduce(or_, map(match, ps)) | |
space = char(" ").orElse(string("\n")) | |
spaces = many(space) | |
letter = sat(methodcaller("isalpha")) | |
operator = choice("-+*/") | |
token = plus(digit | letter | operator) | |
@do | |
def lisp(): | |
yield char("(") | |
t = yield token.orElse(lisp()) | |
ts = yield body().orElse(ret([])) | |
yield char(")") | |
yield ret([t] + ts) | |
@do | |
def body(): | |
yield plus(space) | |
t = yield token.orElse(lisp()) | |
ts = yield body().orElse(ret([])) | |
yield ret([t] + ts) | |
lisp().parseAll("(define)") | |
# => ['define'] | |
lisp().parseAll("(define diameter (lambda (r) (* 2 r)))") | |
# => ['define' 'diameter' ['lambda' ['r'] ['*' '2' 'r']]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@do | |
def p(): | |
yield char("[") | |
d = yield digit | |
ds = yield many(skip(",").andThen(digit)) | |
yield char("]") | |
yield ret(d + ds) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You must have missed online here: