mattboehm · February 23, 2018 22:09
diff --git a/mirrorboard_collisions.py b/mirrorboard_collisions.py
 #python3
 from collections import defaultdict, Counter
 import pprint as pp
 DV_KEYS = [
 "',.pyfgcrl",
 "aoeuidhtns",
 ";qjkxbmwvz",
 ]
 KEYS = [
 "qwertyuiop",        
 "asdfghjkl;",        
 "zxcvbnm,./",        
 ]
 MIRRORS = {}
 for row in KEYS:
    rev = "".join(reversed(row))
    for x in range(len(row)//2):
        o = -1*(x + 1)
        MIRRORS[row[x]] = row[o]
        MIRRORS[row[o]] = row[x]
 pp.pprint(MIRRORS)


 def key(word):
    return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())

 words_by_key = defaultdict(set)
 total_words = 0
 bad_words = set()
 with open("/usr/share/dict/words") as f:
    for word in f:
        word = word.strip().lower()
        try:
            words_by_key[key(word)].add(word)
        except KeyError:
            bad_words.add(word)
        total_words += 1
 processed_words = total_words - len(bad_words)

 print(total_words, "words total")
 print(len(bad_words), "words unable to process: ", list(bad_words)[:10])

 lens = Counter((len(val) for val in words_by_key.values()))
 print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
 print(lens.most_common())

 print("Probability of a word having N collisions:")
 for numcoll, count in sorted(lens.most_common()):
    probability = numcoll * count / processed_words * 100
    print(numcoll-1, probability)

 cc = 0
 print("Some sample collisions:")
 for wds in words_by_key.values():
    if len(wds) > 1:
        cc += 1
        print(wds)
        if cc > 10:
            break

 # QWERTY
 # 235886 words total
 # 2 words unable to process:  ['jean-pierre', 'jean-christophe']
 # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
 # [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
 # Probability of a word having N collisions:
 # 0 93.83171389326958
 # 1 4.325007206932221
 # 2 0.7656305641756117
 # 3 0.27979854504756574
 # 4 0.08054806599854165
 # 5 0.05087246273592105
 # 6 0.020772922283834427
 # 7 0.00339149751572807
 # Some sample collisions:
 # {'dub', 'dun'}
 # {'killable', 'kissable'}
 # {'percival', 'perceval'}
 # {'it', 'ey'}
 # {'scruf', 'scurf'}
 # {'silverness', 'silverbill'}
 # {'singer', 'linger'}
 # {'wade', 'wake', 'wadi'}
 # {'jag', 'fag'}
 # {'wryly', 'outly'}
 # {'pegasian', 'pegasean'}

 # DVORAK
 # 235886 words total
 # 2 words unable to process:  ['jean-pierre', 'jean-christophe']
 # Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
 # [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
 # Probability of a word having N collisions:
 # 0 96.3270081904665
 # 1 2.5580370012378966
 # 2 0.387902528361398
 # 3 0.0780044428617456
 # 4 0.004239371894660088
 # 5 0.002543623136796052
 # Some sample collisions:
 # {'apathism', 'agathism'}
 # {'balk', 'balm'}
 # {'unary', 'hoary'}
 # {'cypris', 'cypria'}
 # {'indiscreetly', 'indiscretely'}
 # {'pump', 'gump'}
 # {'yond', 'food'}
 # {'getae', 'geest'}
 # {'trig', 'trip'}
 # {'apselaphesia', 'apselaphesis'}
 # {'tach', 'each'}
	#python3
	from collections import defaultdict, Counter
	import pprint as pp
	DV_KEYS = [
	"',.pyfgcrl",
	"aoeuidhtns",
	";qjkxbmwvz",
	]
	KEYS = [
	"qwertyuiop",
	"asdfghjkl;",
	"zxcvbnm,./",
	]
	MIRRORS = {}
	for row in KEYS:
	rev = "".join(reversed(row))
	for x in range(len(row)//2):
	o = -1*(x + 1)
	MIRRORS[row[x]] = row[o]
	MIRRORS[row[o]] = row[x]
	pp.pprint(MIRRORS)


	def key(word):
	return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())

	words_by_key = defaultdict(set)
	total_words = 0
	bad_words = set()
	with open("/usr/share/dict/words") as f:
	for word in f:
	word = word.strip().lower()
	try:
	words_by_key[key(word)].add(word)
	except KeyError:
	bad_words.add(word)
	total_words += 1
	processed_words = total_words - len(bad_words)

	print(total_words, "words total")
	print(len(bad_words), "words unable to process: ", list(bad_words)[:10])

	lens = Counter((len(val) for val in words_by_key.values()))
	print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
	print(lens.most_common())

	print("Probability of a word having N collisions:")
	for numcoll, count in sorted(lens.most_common()):
	probability = numcoll * count / processed_words * 100
	print(numcoll-1, probability)

	cc = 0
	print("Some sample collisions:")
	for wds in words_by_key.values():
	if len(wds) > 1:
	cc += 1
	print(wds)
	if cc > 10:
	break

	# QWERTY
	# 235886 words total
	# 2 words unable to process: ['jean-pierre', 'jean-christophe']
	# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
	# [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
	# Probability of a word having N collisions:
	# 0 93.83171389326958
	# 1 4.325007206932221
	# 2 0.7656305641756117
	# 3 0.27979854504756574
	# 4 0.08054806599854165
	# 5 0.05087246273592105
	# 6 0.020772922283834427
	# 7 0.00339149751572807
	# Some sample collisions:
	# {'dub', 'dun'}
	# {'killable', 'kissable'}
	# {'percival', 'perceval'}
	# {'it', 'ey'}
	# {'scruf', 'scurf'}
	# {'silverness', 'silverbill'}
	# {'singer', 'linger'}
	# {'wade', 'wake', 'wadi'}
	# {'jag', 'fag'}
	# {'wryly', 'outly'}
	# {'pegasian', 'pegasean'}

	# DVORAK
	# 235886 words total
	# 2 words unable to process: ['jean-pierre', 'jean-christophe']
	# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
	# [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
	# Probability of a word having N collisions:
	# 0 96.3270081904665
	# 1 2.5580370012378966
	# 2 0.387902528361398
	# 3 0.0780044428617456
	# 4 0.004239371894660088
	# 5 0.002543623136796052
	# Some sample collisions:
	# {'apathism', 'agathism'}
	# {'balk', 'balm'}
	# {'unary', 'hoary'}
	# {'cypris', 'cypria'}
	# {'indiscreetly', 'indiscretely'}
	# {'pump', 'gump'}
	# {'yond', 'food'}
	# {'getae', 'geest'}
	# {'trig', 'trip'}
	# {'apselaphesia', 'apselaphesis'}
	# {'tach', 'each'}