Skip to content

Instantly share code, notes, and snippets.

@pervognsen
Last active February 2, 2018 11:52
Show Gist options
  • Save pervognsen/b95f6eb73ded6abb7a99e9c73ea16c7e to your computer and use it in GitHub Desktop.
Save pervognsen/b95f6eb73ded6abb7a99e9c73ea16c7e to your computer and use it in GitHub Desktop.
def record(name, fields=''):
fields = fields.split()
class cls:
def __init__(self, *args):
for key, value in zip(fields, args):
self.__dict__[key] = value
def __repr__(self):
return '%s(%s)' % (name, ', '.join(repr(self.__dict__[key]) for key in fields))
cls.__name__ = name
return cls
def memoize1(f):
memo = {}
def f_memo(x):
try: x_memo = memo[x]
except: x_memo = memo[x] = f(x)
return x_memo
f_memo.memo = memo
return f_memo
def memoize2(f):
memo = {}
def f_memo(x, y):
try: x_memo = memo[x]
except: x_memo = memo[x] = {}
try: xy_memo = x_memo[y]
except: xy_memo = x_memo[y] = f(x, y)
return xy_memo
f_memo.memo = memo
return f_memo
null = record('null')()
epsilon = record('epsilon')()
Alt = record('alt', 'e1 e2')
Seq = record('seq', 'e1 e2')
Star = record('star', 'e1')
@memoize2
def alt(e1, e2):
if id(e2) < id(e1):
return alt(e2, e1)
elif e1 is null:
return e2
elif e2 is null:
return e1
elif e1 is e2:
return e1
elif isinstance(e1, Alt):
return alt(e1.e1, alt(e1.e2, e2))
else:
return Alt(e1, e2)
@memoize2
def seq(e1, e2):
if e1 is null or e2 is null:
return null
elif e1 is epsilon:
return e2
elif e2 is epsilon:
return e1
elif isinstance(e1, Seq):
return seq(e1.e1, seq(e1.e2, e2))
else:
return Seq(e1, e2)
@memoize1
def star(e):
if e is null or e is epsilon:
return epsilon
elif isinstance(e, Star):
return e.e1
else:
return Star(e)
def opt(e):
return alt(epsilon, e)
def plus(e):
return seq(e, star(e))
@memoize1
def nullable(e):
if e is null or isinstance(e, str):
return False
elif e is epsilon or isinstance(e, Star):
return True
elif isinstance(e, Alt):
return nullable(e.e1) or nullable(e.e2)
elif isinstance(e, Seq):
return nullable(e.e1) and nullable(e.e2)
@memoize2
def derive(e, c):
if e is null or e is epsilon:
return null
elif isinstance(e, str):
return epsilon if e == c else null
elif isinstance(e, Alt):
return alt(derive(e.e1, c), derive(e.e2, c))
elif isinstance(e, Seq):
d = seq(derive(e.e1, c), e.e2)
return alt(d, derive(e.e2, c)) if nullable(e.e1) else d
elif isinstance(e, Star):
return seq(derive(e.e1, c), e)
def re_match(e, s):
for c in s:
e = derive(e, c)
return nullable(e)
def make_dfa(e):
transitions = []
final = []
def new_state():
transitions.append(None)
final.append(None)
return len(transitions)-1
visited = {}
def visit(e):
if e not in visited:
state = visited[e] = new_state()
transitions[state] = [visit(derive(e, chr(n))) for n in range(256)]
final[state] = int(nullable(e))
return visited[e]
visit(e)
return transitions, final
def dfa_match(dfa, s):
transitions, final = dfa
state = 0
for c in s:
state = transitions[state][ord(c)]
return final[state]
def dfa_generate_c(dfa):
transitions, final = dfa
states = range(len(transitions))
print("%s transitions[][256] = {" % ("char" if len(states) <= 256 else "short"))
for state in states:
print(" {%s}," % ','.join(str(transitions[state][n]) for n in range(256)))
print("};")
print("char final[] = {%s};" % ', '.join(str(final[state]) for state in states))
from timeit import timeit
s = "x"*10_000_000 + "y"
e = seq(plus(seq(plus('x'), plus('x'))), 'y')
print(timeit(lambda: print(re_match(e, s)), number=1))
dfa = make_dfa(e)
print(timeit(lambda: print(dfa_match(dfa, s)), number=1))
dfa_generate_c(dfa)
@agoose77
Copy link

agoose77 commented Feb 2, 2018

Haha, I know what this is!
I was looking through your gists since I saw your tweet re conic sections https://t.co/Utq9RtmvWx
I really like the application of derivatives in parsing, whether or not they are a performant solution

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment