Last active
March 1, 2018 00:02
-
-
Save blakehurlburt/3350672df5db78516be248ace1076e3e to your computer and use it in GitHub Desktop.
Checks to see if two regexes match the same language (low error but no guarantee)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Author: Blake Hurlburt | |
### This program takes in two regular expressions and tries to determine | |
### whether they recognize the same language. | |
### If the universe is small enough, it tests the entire universe; | |
### otherwise, it tests a random sample from the universe. | |
import itertools as it | |
import re | |
import random | |
import threading | |
##################### | |
# Inputs # | |
##################### | |
alphabet = 'ab' # Alphabet for the language | |
maxlen = 21 # Maximum word length | |
r1 = re.compile(r'(a*b*)*') # Regex 1 | |
r2 = re.compile(r'(a|b)*') # Regex 2 | |
##################### | |
bound = int(5E6) # 5 million ~ 1 minute | |
total = len(alphabet) ** (maxlen+1) - 1 | |
if total <= bound: | |
# test every string in the universe | |
length = total | |
strings = it.chain.from_iterable( | |
it.product(alphabet, repeat=i) for i in range(maxlen+1) | |
) | |
else: | |
# universe too large, try 10 million random strings in the universe | |
print('Warning: universe too large, trying random sample of universe') | |
length = bound + len(alphabet) + 1 | |
''' | |
Returns the string in the universe at the specified index | |
''' | |
def stringOf(index, base=len(alphabet)+1): | |
base = len(alphabet) | |
res = '' | |
index = (index - 1) % total + 1 | |
while index > 1: | |
res += alphabet[index % base] | |
index //= base | |
return res[::-1] | |
strings = it.chain.from_iterable([ | |
[''], # ensure we test epsilon | |
alphabet, # ensure we test each single-character string | |
(stringOf(i) for i in random.sample(range(total), k=bound)), # random sample | |
]) | |
i=0 # need forward declaration of i for progress function | |
''' | |
check progress every 3 seconds | |
''' | |
def progress(): | |
try: | |
print('Progress: {}%'.format(round(100 * i / length))) | |
global timer | |
timer = threading.Timer(3.0, progress) | |
timer.start() | |
except Exception as e: | |
print(e) | |
timer.cancel() | |
timer = threading.Timer(3.0, progress) | |
timer.start() | |
# check each string | |
for i,s in enumerate(strings): # i used in progress function | |
s=''.join(s) # convert from tuple of chars to string | |
m1 = r1.fullmatch(s) # check regex 1 | |
m2 = r2.fullmatch(s) # check regex 2 | |
if bool(m1) != bool(m2): # if one matched but not the other | |
yes,no = (r1,r2) if m1 else (r2,r1) | |
print("'{}' matches \"{}\", but '{}' does not".format( | |
yes.pattern, s, no.pattern | |
)) | |
print('Fail') | |
break # no need to check further | |
else: | |
print('Pass') | |
# stop progress function | |
timer.cancel() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment