Skip to content

Instantly share code, notes, and snippets.

@kachayev
Last active July 25, 2024 23:55
Show Gist options
  • Save kachayev/987a78d8f2705a733761 to your computer and use it in GitHub Desktop.
Save kachayev/987a78d8f2705a733761 to your computer and use it in GitHub Desktop.
## Alexey Kachayev, 2014
## Link to slides:
## http://goo.gl/n4ylC4
## Basic:
## type Parser = String -> Tree
## Composition
## type Parser = String -> (Tree, String)
## Generalise
## type Parser a = String -> (a, String)
## Errors
## type Parser a = String -> Maybe (a, String)
## Better...
## type Parser a = String -> [(a, String)]
# match any letter
def any(inp):
return [(inp[0], inp[1:])] if inp else []
any("python2.7")
# => [('p', 'ython2.7')]
any("")
# = []
# match only given letter
def match(p):
def _inner(inp):
return [(inp[0], inp[1:])] if (inp and inp[0] == p) else []
return _inner
p1 = match("p")
p1("python2.7")
# => [('p', 'ython2.7')]
p1("scala2.11")
# => []
# skip given letter
def skip(p):
def _inner(inp):
return [("", inp[1:])] if (inp and inp[0] == p) else []
return _inner
p2 = skip("(")
# => [('', 'python2.7)')]
# always produce given elem
def unit(p):
return lambda inp: [(p, inp)]
p3 = unit("P")
p3("python2.7")
# => [('P', 'python2.7')]
from functools import partial
from operator import eq
# sat :: (a -> Bool) -> Parser a
def sat(pred):
return lambda inp: filter(lambda (v,_): pred(v), any(inp))
# match :: a -> Parser a
def match(p):
return sat(partial(eq, p))
p5 = match("P")
p5("Python2.7")
# => [('P', 'ython2.7')]
# skip :: Char -> Parser Char
def skip(p):
return lambda inp: map(lambda (_, tail): ("", tail), match(p)(inp))
p6 = skip("P")
p6("Python2.7")
# => [('', 'ython2.7')]
p6("python2.7")
# => []
from operator import methodcaller
digit = sat(methodcaller("isdigit"))
digit("2.7python")
# => [('2', '.7python')]
digit("python2.7")
# => []
from fn.iters import filter, first, rest
class parser(object):
def __init__(self, f):
self._f = f
def parse(self, inp):
return self._f(inp)
def parseAll(self, inp):
f = first(filter(lambda (v, tail): tail == "", self.parse(inp)))
return f[0] if f else None
def __call__(self, inp):
return self.parse(inp)
def __or__(self, r):
return parser(lambda inp: self.parse(inp) + r.parse(inp))
def __and__(self, r):
def _inner(inp):
return [((v1, v2), tail2) for (v1,tail1) in self.parse(inp)
for (v2, tail2) in r.parse(tail1)]
return parser(_inner)
any = parser(lambda inp: [(inp[0], inp[1:])] if inp else [])
def unit(p):
return parser(lambda inp: [(p, inp)])
def sat(pred):
return parser(lambda inp: filter(lambda (v,_): pred(v), any(inp)))
def match(p):
return sat(partial(eq, p))
def skip(p):
return parser(lambda inp: map(lambda (_, tail): ("", tail), match(p).parse(inp)))
char = match
digit = sat(methodcaller("isdigit"))
p11 = digit & char(".") & digit
p11.parse("2.7")
# => [((('2', '.'), '7'), '')]
def many(prs):
return (prs & parser(lambda inp: many(prs).parse(inp))) | unit(())
def string(s):
return reduce(and_, map(match, s))
string("python").parse("python2.7")
# => [(((((('p', 'y'), 't'), 'h'), 'o'), 'n'), '2.7')]
from fn import Stream
combine = lambda v1,v2: (v1, v2)
class parser(object):
def __init__(self, f):
self._f = f
def parse(self, inp):
if inp == "": return []
return self._f(inp)
def parseAll(self, inp):
f = first(filter(lambda (v, tail): tail == "", self.parse(inp)))
return f[0] if f else None
@classmethod
def delay(cls, f, *args):
return cls(lambda inp: f(*args).parse(inp))
def andThen(self, r):
def _parse(inp):
for (v1, tail1) in self.parse(inp):
for (v2, tail2) in r.parse(tail1):
yield (combine(v1, v2), tail2)
return parser(_parse)
def orElse(self, r):
return parser(lambda inp: Stream() << self.parse(inp) << r.parse(inp))
def __call__(self, inp):
return self.parse(inp)
def __or__(self, r):
return self.orElse(r)
def __and__(self, r):
return self.andThen(r)
from operator import add
combine = add
# simple example
# expr -> "python" \d "." \d
version = digit & char(".") & digit
py_ver = string("python").andThen(version)
py_ver.parseAll("python2.7")
# => 'python2.7'
py_ver.parseAll("python3.2")
# => 'python3.2'
py_ver.parseAll("scala2.11")
# => None
# more
# parse [1,2,3,4,5]
# expr -> "[" \d ("," \d)* "]"
ints = digit.andThen(many(skip(",").andThen(digit)))
wrapped = skip("[") & ints & skip("]")
wrapped.parseAll("[1,2,3,4,5]")
# => '12345'
wrapped.parseAll("[1,2,3,4,")
# => None
p :: Parser String
p = do char '['
d <- digit
ds <- many (do {char ','; digit})
char ']'
return (d:ds)
char '[' >>= \_ ->
digit >>= \d ->
digits >>= \ds ->
char ']' >>= _ ->
f d ds
class parser(object):
def __init__(self, f):
self._f = f
def parse(self, inp):
return self._f(inp)
def parseAll(self, inp):
f = first(filter(lambda (v, tail): tail == "", self.parse(inp)))
return f[0] if f else None
@classmethod
def delay(cls, f, *args):
return cls(lambda inp: f(*args).parse(inp))
# unit :: a -> Parser a
@classmethod
def unit(cls, a):
return cls(lambda inp: (yield (a, inp)))
# bind :: Parser a-> (a -> Parser b) -> Parser b
def bind(self, f):
def _parse(inp):
for (v, tail) in self.parse(inp):
for (v2, tail2) in f(v).parse(tail):
yield (v2, tail2)
return parser(_parse)
# lift :: (a -> b) -> (a -> Parser b)
@classmethod
def lift(cls, f):
return (lambda v: cls.unit(f(v)))
# lifted :: Parser a -> (a -> b) -> Parser b
def lifted(self, f):
return self.bind(parser.lift(f))
def __rshift__(self, f):
return self.bind(f)
def andThen(self, r):
return self.bind(lambda v1:
r.bind(lambda v2:
parser.unit(combine(v1, v2))))
def orElse(self, r):
return parser(lambda inp: Stream() << self.parse(inp) << r.parse(inp))
def __call__(self, inp):
return self.parse(inp)
def __or__(self, r):
return self.orElse(r)
def __and__(self, r):
return self.andThen(r)
# use it
any = parser(lambda inp: [(inp[0], inp[1:])] if inp else [])
failure = parser(lambda _: [])
def sat(pred):
return any >> (lambda v: parser.unit(v) if pred(v) else failure)
def match(p):
return sat(partial(eq, p))
def skip(p):
return match(p).lifted(lambda _: "")
digits = many(char(",") >> (lambda _:
digit >> (lambda d:
parser.unit(d))))
p8 = char("[") >> (lambda _:
digit >> (lambda d:
digits >> (lambda ds:
char("]") >> (lambda _:
parser.unit(d + ds)))))
p8.parseAll("[1,2,3,4,5]")
# => "12345"
ret = parser.unit
def do(f):
def _runner(*args, **kwargs):
gen = f(*args, **kwargs)
def _parse(inp):
s = gen.send(None)
linp = inp
while 1:
r = list(s.parse(linp))
if len(r):
(a, tail) = first(r)
try: s = gen.send(a)
except StopIteration, e:
return [(a, tail)]
else: linp = tail
else: return []
return parser(_parse)
return _runner
@do
def p():
yield char("[")
d = yield digit
ds = yield many(skip(",").andThen(digit))
yield char("]")
yield ret(d + ds)
p().parseAll("[1,2,3,4,5]")
# => "12345"
def choice(ps): return reduce(or_, map(match, ps))
space = char(" ").orElse(string("\n"))
spaces = many(space)
letter = sat(methodcaller("isalpha"))
operator = choice("-+*/")
token = plus(digit | letter | operator)
@do
def lisp():
yield char("(")
t = yield token.orElse(lisp())
ts = yield body().orElse(ret([]))
yield char(")")
yield ret([t] + ts)
@do
def body():
yield plus(space)
t = yield token.orElse(lisp())
ts = yield body().orElse(ret([]))
yield ret([t] + ts)
lisp().parseAll("(define)")
# => ['define']
lisp().parseAll("(define diameter (lambda (r) (* 2 r)))")
# => ['define' 'diameter' ['lambda' ['r'] ['*' '2' 'r']]]
@do
def p():
yield char("[")
d = yield digit
ds = yield many(skip(",").andThen(digit))
yield char("]")
yield ret(d + ds)
@tiye
Copy link

tiye commented Aug 31, 2014

You must have missed online here:

p2 = skip("(")
# => [('', 'python2.7)')]
p2("(python2.7)")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment