Created
April 21, 2012 05:58
-
-
Save cammckinnon/2434543 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python 2.7.3 | |
# reads ciphertext from ./in.txt and a whitespace-delimited | |
# dictionary of english words from ./dictionary/wordlist.txt | |
import string | |
import sys | |
from collections import defaultdict | |
from copy import copy | |
from itertools import izip | |
# english dictionary | |
words = set() | |
# word pattern => list of corresponding english words | |
pattern_words = defaultdict(list) | |
def get_pattern(word): | |
"""Return the word pattern for the input word | |
>>> get_pattern("cat") | |
'abc' | |
>>> get_pattern("schoolbus") | |
'abcddefga' | |
>>> get_pattern("racecar") | |
'abcdcba' | |
Applying a substitution cipher to a word preserves | |
its word pattern. We'll exploit this for decryption. | |
""" | |
map = {} | |
letters = iter(string.lowercase) | |
pattern = '' | |
for char in word: | |
if char not in map: | |
map[char] = letters.next() | |
pattern += map[char] | |
return pattern | |
def next_mappings(word, initial_mapping): | |
"""Iterate over all supersets of `initial_mapping` that translate `word`. | |
Returns a generator that iterates over supersets of `initial_mapping` | |
such that `word` translates to a word in the `words` dictionary. | |
Each superset will add only enough to `initial_mapping` to translate `word`. | |
""" | |
vals = set(initial_mapping.values()) | |
# look up possible translations by pattern | |
for translation in pattern_words[get_pattern(word)]: | |
# new mapping to be yielded | |
new_mapping = initial_mapping.copy() | |
# set to true if the translation cannot be completed | |
failed = False | |
# generate a new mapping based on the translation | |
for k, v in zip(word, translation): | |
if (initial_mapping[k] or v in vals) and initial_mapping[k] != v: | |
failed = True | |
continue | |
if not initial_mapping[k]: | |
new_mapping[k] = v | |
if not failed: | |
yield new_mapping | |
def find_mappings(cipher_words, mapping = defaultdict(lambda: None)): | |
"""Return a generator that iterates over mappings that translate | |
the words in cipher_words to english words.""" | |
# no more cipher words to examine - return current mapping | |
if len(cipher_words) == 0: | |
yield mapping | |
return | |
# work with the first word | |
word = cipher_words[0] | |
# for each adjustment to the mapping that correctly translates `word`... | |
for current_mapping in next_mappings(word, mapping): | |
# ...recursively attempt to translate the rest of the words | |
result = find_mappings(cipher_words[1:], current_mapping) | |
for yielded in result: | |
yield yielded | |
# read in the dictionary words | |
with open('../dictionary/wordlist.txt') as f: | |
for word in f.read().split(): | |
words.add(word) | |
# compute and store the pattern for each dictionary word | |
for word in words: | |
pattern = get_pattern(word) | |
pattern_words[pattern] += [word] | |
# read in the input words | |
with open('in.txt') as f: | |
# read in the input as a lowercase string | |
ciphertext = ' '.join(f.readlines()).lower() | |
# strip out all non-alphabetical characters | |
cipher_words = ''.join([c for c in ciphertext if c in string.lowercase + ' ']) | |
# split by whitespace | |
cipher_words = cipher_words.split() | |
# sort words by length | |
cipher_words.sort(key=lambda word: -len(word)) | |
# print all solutions | |
mapping = None | |
for mapping in find_mappings(cipher_words): | |
# print the translation | |
for c in ciphertext: | |
sys.stdout.write(mapping[c] if mapping[c] else c) | |
print '' | |
# if no solutions were printed, output an error message | |
if not mapping: | |
print "Sorry! No solutions were found." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment