Created
June 5, 2016 09:04
-
-
Save Rhomboid/5958166b013e80c7fa49aa40aaf18d99 to your computer and use it in GitHub Desktop.
Uniqueness functions that preserve order in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import itertools | |
import timeit | |
import re | |
def f1(seq): | |
''' continue ''' | |
seen = set() | |
result = [] | |
for item in seq: | |
if item in seen: | |
continue | |
seen.add(item) | |
result.append(item) | |
return result | |
def f2(seq): | |
''' if-not ''' | |
seen = set() | |
result = [] | |
for item in seq: | |
if item not in seen: | |
seen.add(item) | |
result.append(item) | |
return result | |
def f3(seq): | |
''' continue and cached lookups ''' | |
seen = set() | |
result = [] | |
seen_add = seen.add | |
result_append = result.append | |
for item in seq: | |
if item in seen: | |
continue | |
seen_add(item) | |
result_append(item) | |
return result | |
def f4(seq): | |
''' if-not and cached lookups ''' | |
seen = set() | |
result = [] | |
seen_add = seen.add | |
result_append = result.append | |
for item in seq: | |
if item not in seen: | |
seen_add(item) | |
result_append(item) | |
return result | |
def f5(seq): | |
''' list comprehension ''' | |
seen = set() | |
return [item for item in seq if item not in seen and not seen.add(item)] | |
def f6(seq): | |
''' list comprehension with cached lookup ''' | |
seen = set() | |
seen_add = seen.add | |
return [item for item in seq if item not in seen and not seen_add(item)] | |
def gen_test_data(total, unique): | |
items = list(itertools.islice(itertools.cycle(range(unique)), total)) | |
random.shuffle(items) | |
return items | |
random.seed(0xfeedbeef) | |
test_sizes = [8**n for n in range(3, 8)] | |
for uniq_factor in (1, 0.8, 0.5, 0.1): | |
print('uniqueness factor {}'.format(uniq_factor)) | |
print('n= {}'.format(' '.join(format(n, '12') for n in test_sizes))) | |
for fname, doc in sorted((fname, func.__doc__) for fname, func in globals().items() if re.match(r'f\d+$', fname)): | |
results = [] | |
for n in test_sizes: | |
test_data = gen_test_data(n, int(n * uniq_factor)) | |
results.append(min(timeit.repeat(fname + '(test_data)', repeat=3, number=1, globals=globals()))) | |
print('{:2} {} {}'.format(fname, ' '.join(format(t * 1e3, '12f') for t in results), doc)) | |
print() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uniqueness factor 1 | |
n= 512 4096 32768 262144 2097152 | |
f1 0.121672 0.886637 7.693327 71.831190 778.051205 continue | |
f2 0.127394 0.935728 7.731876 73.947193 770.246445 if-not | |
f3 0.096374 0.695697 5.881201 56.200590 636.565966 continue and cached lookups | |
f4 0.095470 0.667086 5.628823 55.815396 635.029108 if-not and cached lookups | |
f5 0.090049 0.611069 5.362290 52.788360 627.457453 list comprehension | |
f6 0.067160 0.478254 4.070883 43.047395 542.564623 list comprehension with cached lookup | |
uniqueness factor 0.8 | |
n= 512 4096 32768 262144 2097152 | |
f1 0.110227 0.761954 6.468779 60.635584 676.896829 continue | |
f2 0.110227 0.754124 6.543469 59.815806 677.945194 if-not | |
f3 0.082821 0.605347 5.133101 47.713987 568.023527 continue and cached lookups | |
f4 0.082520 0.571014 4.814466 47.184836 567.102255 if-not and cached lookups | |
f5 0.077099 0.532766 4.687674 45.528414 549.348424 list comprehension | |
f6 0.072581 0.436994 3.611301 37.725462 491.662174 list comprehension with cached lookup | |
uniqueness factor 0.5 | |
n= 512 4096 32768 262144 2097152 | |
f1 0.074991 0.569508 4.538295 39.820986 487.903903 continue | |
f2 0.073485 0.588783 4.446740 40.104987 475.449116 if-not | |
f3 0.058125 0.432477 3.492641 31.561163 414.122051 continue and cached lookups | |
f4 0.057824 0.434284 3.380305 31.197653 405.755916 if-not and cached lookups | |
f5 0.055114 0.414407 3.210748 29.628570 395.969775 list comprehension | |
f6 0.046982 0.345740 2.664129 25.536605 358.497896 list comprehension with cached lookup | |
uniqueness factor 0.1 | |
n= 512 4096 32768 262144 2097152 | |
f1 0.032225 0.227683 1.839833 15.750766 154.170419 continue | |
f2 0.030117 0.224972 1.710933 15.091510 149.150858 if-not | |
f3 0.029816 0.222563 1.618776 14.075671 139.887243 continue and cached lookups | |
f4 0.027105 0.195157 1.527823 13.564289 135.744682 if-not and cached lookups | |
f5 0.035237 0.193651 1.550712 13.297153 135.922672 list comprehension | |
f6 0.025900 0.190940 1.443496 12.561702 130.104415 list comprehension with cached lookup | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment