-
-
Save jdnier/7749326209d4cb241c766c0e0151ff9e to your computer and use it in GitHub Desktop.
Python parsing with derivatives.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This file implements Brzozowski's parsing-with-derivatives (see: | |
# https://arxiv.org/abs/1604.04695) with fairly simple and | |
# straightforward Python code. This implementation is naive and | |
# subject to exponential memory growth in the worst case; in practice, | |
# average complexity is closer to linear. This version recalculates | |
# derivatives every time they are needed; memoization, the first and | |
# most obvious optimization, is not implemented. | |
# | |
# For details and a pedagogical example, please see: | |
# http://www.elfsternberg.com/2018/01/24/parsing-derivatives-naive-python-edition/ | |
class Derives(object): | |
def __call__(self, w, l = None): | |
if l == None: | |
l = self | |
if (w == ""): | |
return nullable(l) | |
return self.__call__(w[1:], self.derive(w[0], l)) | |
def derive(self, c, o = None): | |
if o == None: | |
o = self | |
if isinstance(o, Empty): return Empty() | |
if isinstance(o, Eps): return Empty() | |
if isinstance(o, Char): | |
if (o.c == c): | |
return Eps() | |
return Empty() | |
if isinstance(o, Rep): | |
return Cat(o.r.derive(c), o) | |
if isinstance(o, Alt): | |
return Alt(o.l.derive(c), o.r.derive(c)) | |
if isinstance(o, Cat): | |
left_derivative = Cat(o.l.derive(c), o.r) | |
if nullable(o.l): | |
return Alt(left_derivative, o.r.derive(c)) | |
return left_derivative | |
class Empty(Derives): | |
def __init__(self, *args, **kwargs): pass | |
def __str__(self): return "(Empty)" | |
class Eps(Derives): | |
def __init__(self, *args, **kwargs): pass | |
def __str__(self): return "(EPS)" | |
class Char(Derives): | |
def __init__(self, c, *args, **kwargs): | |
self.c = c | |
def __str__(self): return "(char '{}')".format(self.c) | |
class Cat(Derives): | |
def __init__(self, l, r, *args, **kwargs): | |
if len(args) > 0: | |
self.l = l | |
self.r = Cat(r, args[0], *args[1:]) | |
else: | |
self.l = l | |
self.r = r | |
def __str__(self): | |
return "(cat {} {})".format(self.l, self.r) | |
class Alt(Derives): | |
def __init__(self, l, r, *args, **kwargs): | |
if len(args) > 0: | |
self.l = l | |
self.r = Alt(r, args[0], *args[1:]) | |
else: | |
self.l = l | |
self.r = r | |
def __str__(self): | |
return "(alt {} {})".format(self.l, self.r) | |
class Rep(Derives): | |
def __init__(self, r, *args, **kwargs): | |
self.r = r | |
def __str__(self): | |
return "(rep {})".format(self.r) | |
def nullable(l): | |
if isinstance(l, Empty) or isinstance(l, Char): return False | |
if isinstance(l, Eps) or isinstance(l, Rep): return True | |
if isinstance(l, Alt): return nullable(l.l) or nullable(l.r) | |
if isinstance(l, Cat): return nullable(l.l) and nullable(l.r) | |
raise "Not a language." | |
digit = Alt(*[Char(i) for i in "0123456789"]) | |
floater = Cat(Alt(Eps(), Alt(Char("+"), Char("-"))), | |
Rep(digit), | |
Char("."), | |
digit, | |
Rep(digit)) | |
for i in [("-2.0", True), ("1", False), ("", False), ("+12.12", True), ("1.0", True)]: | |
print i[1], floater(i[0]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment