Created
January 28, 2014 05:29
-
-
Save favila/8662711 to your computer and use it in GitHub Desktop.
Various ways to test if all members of a collection are unique in Python 2.7, with timing harness.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import array | |
import collections | |
import bisect | |
re_dup_g = re.compile(r'(.).*\1', re.DOTALL) | |
re_dup_ng = re.compile(r'(.).*?\1', re.DOTALL) | |
def isunique_reg(s, search=re_dup_g.search): | |
return search(s) is None | |
def isunique_reng(s, search=re_dup_ng.search): | |
return search(s) is None | |
def isunique_set(s, set=set, len=len): | |
return len(s) == len(set(s)) | |
def isunique_forset(s, set=set): | |
seen = set() | |
add = seen.add | |
for c in s: | |
if c in seen: | |
return False | |
add(c) | |
return True | |
def isunique_array(s, array=array.array): | |
seen = array('u') | |
append = seen.append | |
for c in s: | |
if c in seen: | |
return False | |
append(c) | |
return True | |
def isunique_deque(s, deque=collections.deque): | |
seen = deque() | |
append = seen.append | |
for c in s: | |
if c in seen: | |
return False | |
append(c) | |
return True | |
def isunique_bisect(s, find=bisect.bisect_right, array=array.array): | |
seen = array('u') | |
insert = seen.insert | |
for c in s: | |
i = find(seen, c) | |
if i and seen[i-1] == c: | |
return False | |
insert(i, c) | |
return True | |
def isunique_bisectl(s, find=bisect.bisect_right): | |
seen = [] | |
insert = seen.insert | |
for c in s: | |
i = find(seen, c) | |
if i and seen[i-1] == c: | |
return False | |
insert(i, c) | |
return True | |
def isunique_count(s, Counter=collections.Counter): | |
return Counter(s).most_common(1)[0][1]==1 | |
def isunique_list(s): | |
seen = [] | |
append = seen.append | |
for c in s: | |
if c in seen: | |
return False | |
append(c) | |
return True | |
def _test(): | |
funcs = [f for n,f in globals().items() if n.startswith('isunique_')] | |
cases = [ | |
(u'string given', False), | |
(u'string uoqzd', True), | |
] | |
for func in funcs: | |
for s,rv in cases: | |
try: | |
assert rv is func(s) | |
except AssertionError, e: | |
print "%s(%r) is not %r" % (func.__name__, s, rv) | |
raise e | |
def _time(): | |
import timeit | |
funcs = [f for n,f in globals().items() if n.startswith('isunique_')] | |
funcs.sort(key=lambda f: f.__name__) | |
cases = [ | |
('!uniq', u'string given', False), | |
('uniq', u'string uoqzd', True), | |
] | |
casenames = [name for name, _, _ in cases] | |
fwidth = max(len(f.__name__) for f in funcs) | |
timeitsetup = 's = {!r}; from __main__ import {} as u' | |
print('{: <{fwidth}s}|{: >15s}|{: >15s}'.format('func', *casenames, fwidth=fwidth)) | |
print('-'*(fwidth+2+15+15)) | |
for f in funcs: | |
times = [timeit.timeit('u(s)', setup=timeitsetup.format(input, f.__name__)) for _, input, _ in cases] | |
print('{: <{fwidth}s}|{: >15.10f}|{: >15.10f}'.format(f.__name__, *times, fwidth=fwidth)) | |
if __name__=='__main__': | |
_test() | |
_time() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment