-
-
Save gartenfeld/cd71bb9301c8b5c5bfe3 to your computer and use it in GitHub Desktop.
English words with "letter score" adding up to 100, sorted by frequency.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re # Regular Expressions | |
import collections # Data Types | |
import sys # File operations | |
import codecs # UniCode support | |
import os | |
import operator | |
import string | |
def load_freq(freq_f): | |
f = codecs.open(freq_f, 'r', encoding='utf-8') | |
letter_values = dict((l, i) for i, l in enumerate(string.ascii_lowercase, start=1)) | |
winners = {} | |
for line in f: | |
try: | |
freq_line = line.rstrip('\r').split('\t') | |
freq = int(freq_line[0]) | |
form = freq_line[1] | |
score = 0 | |
for char in form: | |
if char.isalpha(): score += letter_values[char.lower()] | |
if score == 100: | |
if not form in winners: winners[form]=freq | |
except: | |
print ("Error with: %s." % line, "\tError message: ", sys.exc_info()) | |
winners = sorted(winners.items(), key=lambda x:x[1], reverse=True) | |
return winners | |
if __name__ == '__main__': | |
freq_file = "forms_freq.txt" | |
print("Loading frequency data...") | |
winners = load_freq(freq_file) | |
for rank in range(100): | |
print (rank+1, winners[rank][0]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 problems | |
2 personal | |
3 services | |
4 congress | |
5 culture | |
6 hospital | |
7 analysis | |
8 therefore | |
9 standards | |
10 status | |
11 sources | |
12 variety | |
13 primary | |
14 writing | |
15 prevent | |
16 quarter | |
17 stress | |
18 explains | |
19 colleagues | |
20 attitude | |
21 towards | |
22 telephone | |
23 wednesday | |
24 courses | |
25 excellent | |
26 afghanistan | |
27 elsewhere | |
28 whenever | |
29 session | |
30 surely | |
31 thirty | |
32 discipline | |
33 producer | |
34 performed | |
35 delivery | |
36 publicly | |
37 inflation | |
38 telescope | |
39 styles | |
40 turkey | |
41 referring | |
42 pursue | |
43 threatened | |
44 responded | |
45 annually | |
46 researcher | |
47 addressing | |
48 corridor | |
49 collecting | |
50 appointed | |
51 portland | |
52 applying | |
53 likelihood | |
54 restore | |
55 lightning | |
56 companion | |
57 designers | |
58 resolved | |
59 acknowledge | |
60 maintains | |
61 milwaukee | |
62 practicing | |
63 playoffs | |
64 useless | |
65 towers | |
66 rituals | |
67 cousins | |
68 boundary | |
69 fountain | |
70 selective | |
71 molecular | |
72 temporal | |
73 pouring | |
74 judiciary | |
75 arrests | |
76 pakistani | |
77 pumpkin | |
78 socialism | |
79 whiskey | |
80 suzanne | |
81 profiles | |
82 highways | |
83 interfere | |
84 motors | |
85 pounding | |
86 identifies | |
87 permits | |
88 trembling | |
89 strains | |
90 wholesale | |
91 eventual | |
92 boulevard | |
93 elephants | |
94 outlined | |
95 printer | |
96 unrelated | |
97 squares | |
98 adulthood | |
99 watergate | |
100 imported |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment