Skip to content

Instantly share code, notes, and snippets.

@favila
Created January 28, 2014 05:29
Show Gist options
  • Save favila/8662711 to your computer and use it in GitHub Desktop.
Save favila/8662711 to your computer and use it in GitHub Desktop.
Various ways to test if all members of a collection are unique in Python 2.7, with timing harness.
import re
import array
import collections
import bisect
re_dup_g = re.compile(r'(.).*\1', re.DOTALL)
re_dup_ng = re.compile(r'(.).*?\1', re.DOTALL)
def isunique_reg(s, search=re_dup_g.search):
return search(s) is None
def isunique_reng(s, search=re_dup_ng.search):
return search(s) is None
def isunique_set(s, set=set, len=len):
return len(s) == len(set(s))
def isunique_forset(s, set=set):
seen = set()
add = seen.add
for c in s:
if c in seen:
return False
add(c)
return True
def isunique_array(s, array=array.array):
seen = array('u')
append = seen.append
for c in s:
if c in seen:
return False
append(c)
return True
def isunique_deque(s, deque=collections.deque):
seen = deque()
append = seen.append
for c in s:
if c in seen:
return False
append(c)
return True
def isunique_bisect(s, find=bisect.bisect_right, array=array.array):
seen = array('u')
insert = seen.insert
for c in s:
i = find(seen, c)
if i and seen[i-1] == c:
return False
insert(i, c)
return True
def isunique_bisectl(s, find=bisect.bisect_right):
seen = []
insert = seen.insert
for c in s:
i = find(seen, c)
if i and seen[i-1] == c:
return False
insert(i, c)
return True
def isunique_count(s, Counter=collections.Counter):
return Counter(s).most_common(1)[0][1]==1
def isunique_list(s):
seen = []
append = seen.append
for c in s:
if c in seen:
return False
append(c)
return True
def _test():
funcs = [f for n,f in globals().items() if n.startswith('isunique_')]
cases = [
(u'string given', False),
(u'string uoqzd', True),
]
for func in funcs:
for s,rv in cases:
try:
assert rv is func(s)
except AssertionError, e:
print "%s(%r) is not %r" % (func.__name__, s, rv)
raise e
def _time():
import timeit
funcs = [f for n,f in globals().items() if n.startswith('isunique_')]
funcs.sort(key=lambda f: f.__name__)
cases = [
('!uniq', u'string given', False),
('uniq', u'string uoqzd', True),
]
casenames = [name for name, _, _ in cases]
fwidth = max(len(f.__name__) for f in funcs)
timeitsetup = 's = {!r}; from __main__ import {} as u'
print('{: <{fwidth}s}|{: >15s}|{: >15s}'.format('func', *casenames, fwidth=fwidth))
print('-'*(fwidth+2+15+15))
for f in funcs:
times = [timeit.timeit('u(s)', setup=timeitsetup.format(input, f.__name__)) for _, input, _ in cases]
print('{: <{fwidth}s}|{: >15.10f}|{: >15.10f}'.format(f.__name__, *times, fwidth=fwidth))
if __name__=='__main__':
_test()
_time()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment