Created
February 23, 2018 22:09
-
-
Save mattboehm/b9554c3e042f67fbc16054217e64958f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#python3 | |
from collections import defaultdict, Counter | |
import pprint as pp | |
DV_KEYS = [ | |
"',.pyfgcrl", | |
"aoeuidhtns", | |
";qjkxbmwvz", | |
] | |
KEYS = [ | |
"qwertyuiop", | |
"asdfghjkl;", | |
"zxcvbnm,./", | |
] | |
MIRRORS = {} | |
for row in KEYS: | |
rev = "".join(reversed(row)) | |
for x in range(len(row)//2): | |
o = -1*(x + 1) | |
MIRRORS[row[x]] = row[o] | |
MIRRORS[row[o]] = row[x] | |
pp.pprint(MIRRORS) | |
def key(word): | |
return "".join(min(letter, MIRRORS[letter]) for letter in word.lower()) | |
words_by_key = defaultdict(set) | |
total_words = 0 | |
bad_words = set() | |
with open("/usr/share/dict/words") as f: | |
for word in f: | |
word = word.strip().lower() | |
try: | |
words_by_key[key(word)].add(word) | |
except KeyError: | |
bad_words.add(word) | |
total_words += 1 | |
processed_words = total_words - len(bad_words) | |
print(total_words, "words total") | |
print(len(bad_words), "words unable to process: ", list(bad_words)[:10]) | |
lens = Counter((len(val) for val in words_by_key.values())) | |
print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.") | |
print(lens.most_common()) | |
print("Probability of a word having N collisions:") | |
for numcoll, count in sorted(lens.most_common()): | |
probability = numcoll * count / processed_words * 100 | |
print(numcoll-1, probability) | |
cc = 0 | |
print("Some sample collisions:") | |
for wds in words_by_key.values(): | |
if len(wds) > 1: | |
cc += 1 | |
print(wds) | |
if cc > 10: | |
break | |
# QWERTY | |
# 235886 words total | |
# 2 words unable to process: ['jean-pierre', 'jean-christophe'] | |
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc. | |
# [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)] | |
# Probability of a word having N collisions: | |
# 0 93.83171389326958 | |
# 1 4.325007206932221 | |
# 2 0.7656305641756117 | |
# 3 0.27979854504756574 | |
# 4 0.08054806599854165 | |
# 5 0.05087246273592105 | |
# 6 0.020772922283834427 | |
# 7 0.00339149751572807 | |
# Some sample collisions: | |
# {'dub', 'dun'} | |
# {'killable', 'kissable'} | |
# {'percival', 'perceval'} | |
# {'it', 'ey'} | |
# {'scruf', 'scurf'} | |
# {'silverness', 'silverbill'} | |
# {'singer', 'linger'} | |
# {'wade', 'wake', 'wadi'} | |
# {'jag', 'fag'} | |
# {'wryly', 'outly'} | |
# {'pegasian', 'pegasean'} | |
# DVORAK | |
# 235886 words total | |
# 2 words unable to process: ['jean-pierre', 'jean-christophe'] | |
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc. | |
# [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)] | |
# Probability of a word having N collisions: | |
# 0 96.3270081904665 | |
# 1 2.5580370012378966 | |
# 2 0.387902528361398 | |
# 3 0.0780044428617456 | |
# 4 0.004239371894660088 | |
# 5 0.002543623136796052 | |
# Some sample collisions: | |
# {'apathism', 'agathism'} | |
# {'balk', 'balm'} | |
# {'unary', 'hoary'} | |
# {'cypris', 'cypria'} | |
# {'indiscreetly', 'indiscretely'} | |
# {'pump', 'gump'} | |
# {'yond', 'food'} | |
# {'getae', 'geest'} | |
# {'trig', 'trip'} | |
# {'apselaphesia', 'apselaphesis'} | |
# {'tach', 'each'} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment