-
-
Save paperclip/1941137 to your computer and use it in GitHub Desktop.
From http://thedailywtf.com/Comments/The-Regex-Code-Review.aspx - python implementation of three alternatives
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
class Simple(object): | |
def __init__(self, searchterms): | |
self.__m_searchterms = searchterms | |
def isMatch(self, text): | |
for term in self.__m_searchterms: | |
if term in text: | |
return True | |
return False | |
class RegexLocal(object): | |
def __init__(self, searchterms): | |
self.__m_searchterms = searchterms | |
def isMatch(self, text): | |
r = re.compile("|".join(( re.escape(term) for term in self.__m_searchterms ))) | |
return r.match(text) | |
class RegexPre(object): | |
def __init__(self, searchterms): | |
self.__m_r = re.compile("|".join(( re.escape(term) for term in searchterms ))) | |
def isMatch(self, text): | |
return self.__m_r.match(text) | |
class Tester(object): | |
def __init__(self, searcher, text): | |
assert searcher is not None | |
self.m_searcher = searcher | |
self.m_text = text | |
def __call__(self): | |
self.m_searcher.isMatch(self.m_text) | |
import timeit | |
import random | |
import string | |
def genTerm(length=8, chars=string.letters + string.digits): | |
return ''.join([random.choice(chars) for i in range(length)]) | |
if len(sys.argv) > 1: | |
termlen = int(sys.argv[1]) | |
else: | |
termlen = 8 | |
if len(sys.argv) > 2: | |
termcount = int(sys.argv[2]) | |
else: | |
termcount = 30 | |
if len(sys.argv) > 3: | |
textlen = int(sys.argv[3]) | |
else: | |
textlen = 10000 | |
searchterms = [] | |
for i in xrange(termcount): | |
searchterms.append(genTerm(termlen)) | |
text = genTerm(textlen) | |
s = Simple(searchterms) | |
rl = RegexLocal(searchterms) | |
rp = RegexPre(searchterms) | |
for r in [s,rl,rp]: | |
assert r is not None | |
t = Tester(r,text) | |
print timeit.Timer(t).timeit(1000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment