Created
June 26, 2018 01:05
-
-
Save calroc/ad4321dc4c43d5f3e09d39283461b8fe to your computer and use it in GitHub Desktop.
Dirty but functional implementation of Brzozowski’s derivatives of regular expressions.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import product | |
phi = frozenset() | |
y = frozenset({''}) | |
syms = O, l = frozenset({'0'}), frozenset({'1'}) | |
AND, CONS, KSTAR, NOT, OR = 'and cons * not or'.split() | |
I = (KSTAR, (OR, O, l)) # Match anything. I.e. [01]* Spelled I or . | |
# The expression from Brzozowski: (.111.) & (.01 + 11*)' | |
# ( a ) & ( b + c )' | |
a = (CONS, I, (CONS, l, (CONS, l, (CONS, l, I)))) | |
b = (CONS, I, (CONS, O, l)) | |
c = (CONS, l, (KSTAR, l)) | |
it = (AND, a, (NOT, (OR, b, c))) | |
def nully(R): | |
''' | |
Return y if y is in R otherwise phi. | |
''' | |
if R in syms or R == phi: | |
return phi | |
if R == y: | |
return y | |
tag = R[0] | |
if tag == KSTAR: | |
return y | |
if tag == NOT: | |
return y if nully(R[1]) == phi else phi | |
left, right = nully(R[1]), nully(R[2]) | |
return left & right if tag in {AND, CONS} else left | right | |
# This is the straightforward version with no "compaction". | |
# It works fine, but does waaaay too much work because the | |
# expressions grow each derivation. | |
##def D(symbol): | |
## | |
## def derv(R): | |
## | |
## if R == {symbol}: | |
## return y | |
## | |
## if R == y or R == phi or isinstance(R, set) and len(R) == 1: | |
## return phi | |
## | |
## assert isinstance(R, tuple), repr(R) | |
## tag = R[0] | |
## if tag == KSTAR: | |
## return (CONS, derv(R[1]), R) | |
## | |
## if tag == NOT: | |
## return (NOT, derv(R[1])) | |
## | |
## left, right = R[1:] | |
## if tag == CONS: | |
## A = (CONS, derv(left), right) | |
## if nully(left) == phi: | |
## return A | |
## return (OR, A, derv(right)) | |
## | |
## left, right = derv(left), derv(right) | |
## if isinstance(left, set) and isinstance(right, set): | |
## if tag == AND: | |
## return left & right | |
## assert tag == OR | |
## return left | right | |
## return (tag, left, right) | |
## | |
## return derv | |
def D_compaction(symbol): | |
def derv(R): | |
if R == {symbol}: | |
return y | |
if R == y or R == phi or R in syms: | |
return phi | |
tag = R[0] | |
if tag == KSTAR: | |
return _make_cons(derv(R[1]), R) | |
if tag == NOT: | |
return (NOT, derv(R[1])) | |
left, right = R[1:] | |
if tag == CONS: | |
k = derv(left) | |
if k == phi or right == phi: | |
A = phi | |
elif k == y: | |
A = right | |
else: | |
A = _make_cons(k, right) | |
return A if nully(left) == phi else _make_or(A, derv(right)) | |
left, right = derv(left), derv(right) | |
return _make_and(left, right) if tag == AND else _make_or(left, right) | |
return derv | |
def _make_and(a, b): | |
if a == I: return b | |
if b == I: return a | |
return (AND, a, b) | |
def _make_or(a, b): | |
if a == phi: return b | |
if b == phi: return a | |
if a == I or b == I: return I | |
return (OR, a, b) | |
def _make_cons(a, b): | |
if a == y: return b | |
if b == y: return a | |
if a == phi or b == phi: return phi | |
return (CONS, a, b) | |
class Memo(object): | |
def __init__(self, f): | |
self.f = f | |
self.calls = self.hits = 0 | |
self.mem = {} | |
def __call__(self, key): | |
self.calls += 1 | |
try: | |
result = self.mem[key] | |
self.hits += 1 | |
except KeyError: | |
result = self.mem[key] = self.f(key) | |
return result | |
o, z = Memo(D_compaction('0')), Memo(D_compaction('1')) | |
def S(re): | |
return o(re), z(re) | |
def stringy(re): | |
if re == I: | |
return '.' | |
if re in syms: | |
return next(iter(re)) | |
if re == y: | |
return '^' | |
if re == phi: | |
return 'X' | |
assert isinstance(re, tuple), repr(re) | |
tag = re[0] | |
if tag == KSTAR: | |
body = stringy(re[1]) | |
if not body: | |
return '' | |
return '(' + body + ')*' | |
if tag == NOT: | |
body = stringy(re[1]) | |
if not body: | |
return '' | |
if len(body) > 1: | |
return '(' + body + ")'" | |
return body + "'" | |
left, right = stringy(re[1]), stringy(re[2]) | |
if tag == CONS: | |
return left + right | |
if tag == OR: | |
return left + ' | ' + right | |
return '(' + left + ') | (' + right + ')' | |
if tag == AND: | |
return '(' + left + ') & (' + right + ')' | |
# print it | |
## ('and', | |
## ('cons', | |
## ('*', ('or', set(['1']), set(['0']))), | |
## set(['1']), | |
## set(['1']), | |
## set(['1']), | |
## ('*', ('or', set(['1']), set(['0'])))), | |
## ('not', ('or', | |
## ('cons', | |
## ('*', ('or', set(['1']), set(['0']))), | |
## set(['0']), | |
## set(['1'])), | |
## ('cons', set(['1']), ('*', set(['1'])))))) | |
##print stringy(it) | |
##R = it | |
###ds = [o, o, o,] | |
###ds = [z, o, z, o, z, o, z, o, ] | |
###ds = [z] * 7 | |
##ds = [o] * 7 | |
##while ds: | |
## R = ds.pop()(R) | |
## print stringy(R) | |
h = (CONS, (CONS, O, l), I) | |
j = (CONS, (CONS, O, l), (KSTAR, l)) | |
j = (CONS, (CONS, l, O), (CONS, O, l)) | |
k = (CONS, (CONS, l, l), (CONS, O, l)) | |
d = (CONS, I, (CONS, O, (CONS, O, l))) | |
##nully(z(o(o(d)))) | |
if __name__ == '__main__': | |
REs = set() | |
N = 5 | |
names = list(product(*(N * [(0, 1)]))) | |
dervs = list(product(*(N * [(o, z)]))) | |
for name, ds in zip(names, dervs): | |
R = it # (OR, j, k) # (CONS, O, (CONS, l, (CONS, f, l))) | |
print ''.join(str(n) for n in reversed(name)) | |
print stringy(R) | |
ds = list(ds) | |
while ds: | |
R = ds.pop()(R) | |
if R == phi: | |
print ' X' | |
break | |
if R == I: | |
break | |
REs.add(R) | |
print (' ', '+')[nully(R) == y], stringy(R) | |
print nully(R) == y | |
print '=' * 80 | |
print o.hits, '/', o.calls | |
print z.hits, '/', z.calls | |
for s in sorted( | |
(stringy(r) for r in REs), | |
key=lambda n: (len(n), n)): | |
print s | |
''' | |
... | |
(.111.) & ((.01 | 11*)') | |
... | |
(.01 )' | |
(.01 | 1 )' | |
(.01 | ^ )' | |
(.01 | 1*)' | |
(.111.) & ((.01 | 1 )') | |
(.111. | 11.) & ((.01 | ^ )') | |
(.111. | 11.) & ((.01 | 1*)') | |
(.111. | 11. | 1.) & ((.01 )') | |
(.111. | 11. | 1.) & ((.01 | 1*)') | |
''' | |
##for name, (d0, d1, d2, d3) in zip(names, dervs): | |
## print name, nully(d3(d2(d1(d0(it))))) | |
##def check(s, re): | |
## for ch in s: | |
## der = D_compaction(ch) | |
## re = der(re) | |
## print '...', len(str(re)), nully(re) == y | |
## return nully(re) == y | |
##for s in ( | |
## '11011100', | |
## '110111', | |
## '11011', | |
## '11101', | |
## '11111', | |
## '110110110110110110110110111' | |
## ): | |
## print s, check(s, it) | |
##der = D('1') | |
##for t in (phi, y, l, O, g, I, f, e, d, c, b, a, it): | |
### der(t) | |
## print t | |
### print nully(t) | |
## print der(t) | |
## print '=' * 40 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment