Skip to content

Instantly share code, notes, and snippets.

@paperclip
Created February 29, 2012 14:13
Show Gist options
  • Save paperclip/1941137 to your computer and use it in GitHub Desktop.
Save paperclip/1941137 to your computer and use it in GitHub Desktop.
From http://thedailywtf.com/Comments/The-Regex-Code-Review.aspx - python implementation of three alternatives
#!/usr/bin/env python
import re
class Simple(object):
def __init__(self, searchterms):
self.__m_searchterms = searchterms
def isMatch(self, text):
for term in self.__m_searchterms:
if term in text:
return True
return False
class RegexLocal(object):
def __init__(self, searchterms):
self.__m_searchterms = searchterms
def isMatch(self, text):
r = re.compile("|".join(( re.escape(term) for term in self.__m_searchterms )))
return r.match(text)
class RegexPre(object):
def __init__(self, searchterms):
self.__m_r = re.compile("|".join(( re.escape(term) for term in searchterms )))
def isMatch(self, text):
return self.__m_r.match(text)
class Tester(object):
def __init__(self, searcher, text):
assert searcher is not None
self.m_searcher = searcher
self.m_text = text
def __call__(self):
self.m_searcher.isMatch(self.m_text)
import timeit
import random
import string
def genTerm(length=8, chars=string.letters + string.digits):
return ''.join([random.choice(chars) for i in range(length)])
if len(sys.argv) > 1:
termlen = int(sys.argv[1])
else:
termlen = 8
if len(sys.argv) > 2:
termcount = int(sys.argv[2])
else:
termcount = 30
if len(sys.argv) > 3:
textlen = int(sys.argv[3])
else:
textlen = 10000
searchterms = []
for i in xrange(termcount):
searchterms.append(genTerm(termlen))
text = genTerm(textlen)
s = Simple(searchterms)
rl = RegexLocal(searchterms)
rp = RegexPre(searchterms)
for r in [s,rl,rp]:
assert r is not None
t = Tester(r,text)
print timeit.Timer(t).timeit(1000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment