Created
October 14, 2019 23:09
-
-
Save nsmaciej/6b311115f30adb2e89a29fcc8321eab5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict, Counter | |
import pprint | |
def encode(x, known): | |
seen_vowels = {} | |
seen_other = {} | |
def check(seen, x, m): | |
if x in known: | |
return x | |
if x not in seen: | |
seen[x] = (len(seen) + 1) * m | |
return seen[x] | |
return tuple( | |
check(seen_vowels, i, -1) if i in "AEIOU" else check(seen_other, i, 1) | |
for i in x | |
) | |
def decode(text, cipher): | |
return "".join(cipher.get(i, i) for i in text) | |
def most_common(): | |
# Read the cipher text. | |
lines = open("cipher19.txt").readlines() | |
cleaned = "".join(x.replace(" ", "").strip() for x in lines[1::2]) | |
text = [cleaned[i : i + 3] for i in range(0, len(cleaned), 3)] | |
# Read the code book. | |
code_to_word = dict( | |
tuple(x.strip("\n").split(" : ")) | |
for x in open("codeBook_19.txt", encoding="latin") | |
) | |
# Assume THE is the most common word. | |
cipher = {} | |
most_common_codeword = Counter(text).most_common(1)[0][0] | |
the_codeword = next(x for x, y in code_to_word.items() if y == "THE") | |
for l, r in zip(most_common_codeword, the_codeword): | |
cipher[l] = r | |
# Solve the rest. | |
word_set = set(text) | |
while len(cipher) < 26: | |
encoding_to_possible_pairs = defaultdict(lambda: set()) | |
for code, word in code_to_word.items(): | |
encoding_to_possible_pairs[encode(code, cipher.values())].add((code, word)) | |
for word in word_set: | |
encoding = tuple(cipher.get(i, i) for i in encode(word, cipher.keys())) | |
possible_pairs = encoding_to_possible_pairs[encoding] | |
if len(possible_pairs) > 1 or all(i in cipher for i in word): | |
continue | |
for l, r in zip(word, possible_pairs.pop()[0]): | |
if l not in cipher: | |
cipher[l] = r | |
break | |
print( | |
" ".join( | |
code_to_word[decode(i, cipher)] if set(i) < set(cipher) else "?" | |
for i in text | |
) | |
) | |
if __name__ == "__main__": | |
most_common() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment