Skip to content

Instantly share code, notes, and snippets.

@Rhomboid
Created June 5, 2016 09:04
Show Gist options
  • Save Rhomboid/5958166b013e80c7fa49aa40aaf18d99 to your computer and use it in GitHub Desktop.
Save Rhomboid/5958166b013e80c7fa49aa40aaf18d99 to your computer and use it in GitHub Desktop.
Uniqueness functions that preserve order in Python
import random
import itertools
import timeit
import re
def f1(seq):
''' continue '''
seen = set()
result = []
for item in seq:
if item in seen:
continue
seen.add(item)
result.append(item)
return result
def f2(seq):
''' if-not '''
seen = set()
result = []
for item in seq:
if item not in seen:
seen.add(item)
result.append(item)
return result
def f3(seq):
''' continue and cached lookups '''
seen = set()
result = []
seen_add = seen.add
result_append = result.append
for item in seq:
if item in seen:
continue
seen_add(item)
result_append(item)
return result
def f4(seq):
''' if-not and cached lookups '''
seen = set()
result = []
seen_add = seen.add
result_append = result.append
for item in seq:
if item not in seen:
seen_add(item)
result_append(item)
return result
def f5(seq):
''' list comprehension '''
seen = set()
return [item for item in seq if item not in seen and not seen.add(item)]
def f6(seq):
''' list comprehension with cached lookup '''
seen = set()
seen_add = seen.add
return [item for item in seq if item not in seen and not seen_add(item)]
def gen_test_data(total, unique):
items = list(itertools.islice(itertools.cycle(range(unique)), total))
random.shuffle(items)
return items
random.seed(0xfeedbeef)
test_sizes = [8**n for n in range(3, 8)]
for uniq_factor in (1, 0.8, 0.5, 0.1):
print('uniqueness factor {}'.format(uniq_factor))
print('n= {}'.format(' '.join(format(n, '12') for n in test_sizes)))
for fname, doc in sorted((fname, func.__doc__) for fname, func in globals().items() if re.match(r'f\d+$', fname)):
results = []
for n in test_sizes:
test_data = gen_test_data(n, int(n * uniq_factor))
results.append(min(timeit.repeat(fname + '(test_data)', repeat=3, number=1, globals=globals())))
print('{:2} {} {}'.format(fname, ' '.join(format(t * 1e3, '12f') for t in results), doc))
print()
uniqueness factor 1
n= 512 4096 32768 262144 2097152
f1 0.121672 0.886637 7.693327 71.831190 778.051205 continue
f2 0.127394 0.935728 7.731876 73.947193 770.246445 if-not
f3 0.096374 0.695697 5.881201 56.200590 636.565966 continue and cached lookups
f4 0.095470 0.667086 5.628823 55.815396 635.029108 if-not and cached lookups
f5 0.090049 0.611069 5.362290 52.788360 627.457453 list comprehension
f6 0.067160 0.478254 4.070883 43.047395 542.564623 list comprehension with cached lookup
uniqueness factor 0.8
n= 512 4096 32768 262144 2097152
f1 0.110227 0.761954 6.468779 60.635584 676.896829 continue
f2 0.110227 0.754124 6.543469 59.815806 677.945194 if-not
f3 0.082821 0.605347 5.133101 47.713987 568.023527 continue and cached lookups
f4 0.082520 0.571014 4.814466 47.184836 567.102255 if-not and cached lookups
f5 0.077099 0.532766 4.687674 45.528414 549.348424 list comprehension
f6 0.072581 0.436994 3.611301 37.725462 491.662174 list comprehension with cached lookup
uniqueness factor 0.5
n= 512 4096 32768 262144 2097152
f1 0.074991 0.569508 4.538295 39.820986 487.903903 continue
f2 0.073485 0.588783 4.446740 40.104987 475.449116 if-not
f3 0.058125 0.432477 3.492641 31.561163 414.122051 continue and cached lookups
f4 0.057824 0.434284 3.380305 31.197653 405.755916 if-not and cached lookups
f5 0.055114 0.414407 3.210748 29.628570 395.969775 list comprehension
f6 0.046982 0.345740 2.664129 25.536605 358.497896 list comprehension with cached lookup
uniqueness factor 0.1
n= 512 4096 32768 262144 2097152
f1 0.032225 0.227683 1.839833 15.750766 154.170419 continue
f2 0.030117 0.224972 1.710933 15.091510 149.150858 if-not
f3 0.029816 0.222563 1.618776 14.075671 139.887243 continue and cached lookups
f4 0.027105 0.195157 1.527823 13.564289 135.744682 if-not and cached lookups
f5 0.035237 0.193651 1.550712 13.297153 135.922672 list comprehension
f6 0.025900 0.190940 1.443496 12.561702 130.104415 list comprehension with cached lookup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment