Last active
October 6, 2015 16:43
-
-
Save audiodude/f39a7ecdde4eade0102c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File Edit Options Buffers Tools Python Help | |
#!/usr/bin/python3 | |
""" | |
"You can't spell 'skateboarding' without d-a-t-a-g-r-o-k." | |
Usage: python3 cantspell.py WORD | |
Prints all the words in its dictionary that contain at least all the | |
letters given in WORD | |
""" | |
from collections import Counter | |
wordlist='/usr/share/dict/american-english-insane' | |
def cantspell(s): | |
# I also tried to do a version based on collections.Counter but it | |
# was slower. Can you do better? | |
for f in open(wordlist): | |
f = f.strip() | |
try: | |
g = list(f.lower()) | |
for l in s: | |
g.remove(l) | |
except ValueError: | |
continue | |
yield f | |
def cantspell_b(s): | |
# This uses collections.Counter but it's about 7x slower. :( | |
from collections import Counter | |
s = Counter(s) | |
for word in (x.strip() for x in open(wordlist)): | |
f = Counter(word.lower()) | |
f.subtract(s) | |
if all(v>=0 for v in f.values()): | |
yield word | |
def cantspell2(s): | |
# An attempt to do better. | |
for f in open(wordlist): | |
f = f.strip() | |
f_count = Counter(f.lower()) | |
s_count = Counter(s.lower()) | |
for l, n in s_count.iteritems(): | |
if f_count[l] < s_count[l]: | |
continue | |
yield f | |
def cantspell3(s): | |
# Like cantspell2, but without Counter | |
for f in open(wordlist): | |
f = f.strip() | |
def make_count_dict(word): | |
ans = {} | |
for l in word: | |
if l in ans: | |
ans[l] += 1 | |
else: | |
ans[l] = 0 | |
return ans | |
f_count = make_count_dict(f.lower()) | |
s_count = make_count_dict(s.lower()) | |
for l, n in s_count.iteritems(): | |
if f_count.get(l, 0) < s_count[l]: | |
continue | |
yield f | |
def test(): | |
import cProfile | |
TEST_DATA = [ | |
'datagrok', | |
'krowbar', | |
'insom', | |
'vlimibm', | |
'audiodude', | |
'karlen', | |
'kc', | |
] | |
def exercise(fn): | |
for _ in range(1): | |
for w in TEST_DATA: | |
for __ in fn(w): | |
# Exhaust the generator | |
pass | |
for fn in (cantspell, cantspell2, cantspell_b): | |
print 'Testing %s' % fn.__name__ | |
cProfile.runctx('exercise(fn)', None, locals()) | |
if __name__ == '__main__': | |
import sys | |
for word in sorted(cantspell(sys.argv[1]), key=len, reverse=True): | |
print(word) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Testing cantspell | |
15680089 function calls in 15.092 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 15.092 15.092 <string>:1(<module>) | |
37086 11.301 0.000 15.074 0.000 cantspell.py:12(cantspell) | |
1 0.019 0.019 15.092 15.092 cantspell.py:58(exercise) | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
4555054 0.877 0.000 0.877 0.000 {method 'lower' of 'str' objects} | |
6532884 1.897 0.000 1.897 0.000 {method 'remove' of 'list' objects} | |
4555054 0.999 0.000 0.999 0.000 {method 'strip' of 'str' objects} | |
7 0.000 0.000 0.000 0.000 {open} | |
1 0.000 0.000 0.000 0.000 {range} | |
Testing cantspell_b | |
165935359 function calls (165935357 primitive calls) in 133.082 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 133.082 133.082 <string>:1(<module>) | |
2 0.000 0.000 0.000 0.000 _abcoll.py:98(__subclasshook__) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:16(__init__) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:20(__enter__) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:26(__exit__) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:52(_commit_removals) | |
3 0.000 0.000 0.000 0.000 _weakrefset.py:58(__iter__) | |
13665181 7.380 0.000 7.380 0.000 _weakrefset.py:70(__contains__) | |
2 0.000 0.000 0.000 0.000 _weakrefset.py:83(add) | |
9110115 14.374 0.000 24.157 0.000 abc.py:128(__instancecheck__) | |
2/1 0.000 0.000 0.000 0.000 abc.py:148(__subclasscheck__) | |
37086 19.481 0.001 133.071 0.004 cantspell.py:25(cantspell_b) | |
4555061 3.593 0.000 5.162 0.000 cantspell.py:29(<genexpr>) | |
16574902 4.175 0.000 4.175 0.000 cantspell.py:32(<genexpr>) | |
1 0.012 0.012 133.082 133.082 cantspell.py:58(exercise) | |
4555061 7.041 0.000 61.334 0.000 collections.py:438(__init__) | |
4555061 27.332 0.000 54.293 0.000 collections.py:501(update) | |
4555054 18.601 0.000 36.616 0.000 collections.py:536(subtract) | |
4555054 3.582 0.000 6.580 0.000 {all} | |
9110117 2.404 0.000 2.404 0.000 {getattr} | |
9110115 6.166 0.000 30.323 0.000 {isinstance} | |
2/1 0.000 0.000 0.000 0.000 {issubclass} | |
1 0.000 0.000 0.000 0.000 {method '__subclasses__' of 'type' objects} | |
4 0.000 0.000 0.000 0.000 {method 'add' of 'set' objects} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
67332299 12.862 0.000 12.862 0.000 {method 'get' of 'dict' objects} | |
4555054 1.792 0.000 1.792 0.000 {method 'items' of 'dict' objects} | |
4555054 1.166 0.000 1.166 0.000 {method 'lower' of 'str' objects} | |
2 0.000 0.000 0.000 0.000 {method 'remove' of 'set' objects} | |
4555054 1.569 0.000 1.569 0.000 {method 'strip' of 'str' objects} | |
4555054 1.554 0.000 1.554 0.000 {method 'values' of 'dict' objects} | |
7 0.000 0.000 0.000 0.000 {open} | |
1 0.000 0.000 0.000 0.000 {range} | |
Testing cantspell2 | |
173131584 function calls in 147.258 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 147.258 147.258 <string>:1(<module>) | |
18220216 9.510 0.000 9.510 0.000 _weakrefset.py:70(__contains__) | |
9110108 17.518 0.000 29.212 0.000 abc.py:128(__instancecheck__) | |
4555061 31.174 0.000 145.740 0.000 cantspell.py:25(cantspell2) | |
1 1.519 1.519 147.258 147.258 cantspell.py:58(exercise) | |
9110108 12.655 0.000 106.755 0.000 collections.py:438(__init__) | |
14698231 2.770 0.000 2.770 0.000 collections.py:452(__missing__) | |
9110108 45.546 0.000 94.100 0.000 collections.py:501(update) | |
9110108 2.184 0.000 2.184 0.000 {getattr} | |
9110108 5.992 0.000 35.205 0.000 {isinstance} | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
71887309 13.350 0.000 13.350 0.000 {method 'get' of 'dict' objects} | |
4555054 1.297 0.000 1.297 0.000 {method 'iteritems' of 'dict' objects} | |
9110108 2.348 0.000 2.348 0.000 {method 'lower' of 'str' objects} | |
4555054 1.396 0.000 1.396 0.000 {method 'strip' of 'str' objects} | |
7 0.000 0.000 0.000 0.000 {open} | |
1 0.000 0.000 0.000 0.000 {range} | |
Testing cantspell3 | |
55962109 function calls in 43.058 seconds | |
Ordered by: standard name | |
ncalls tottime percall cumtime percall filename:lineno(function) | |
1 0.000 0.000 43.058 43.058 <string>:1(<module>) | |
4555061 20.504 0.000 41.777 0.000 cantspell.py:46(cantspell3) | |
9110108 12.705 0.000 12.705 0.000 cantspell.py:50(make_count_dict) | |
1 1.281 1.281 43.058 43.058 cantspell.py:78(exercise) | |
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects} | |
24076714 4.311 0.000 4.311 0.000 {method 'get' of 'dict' objects} | |
4555054 1.054 0.000 1.054 0.000 {method 'iteritems' of 'dict' objects} | |
9110108 2.121 0.000 2.121 0.000 {method 'lower' of 'str' objects} | |
4555054 1.080 0.000 1.080 0.000 {method 'strip' of 'str' objects} | |
7 0.000 0.000 0.000 0.000 {open} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment