Created
April 15, 2012 04:49
-
-
Save cammckinnon/2390127 to your computer and use it in GitHub Desktop.
substitution cipher decryption
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python 2.7.3 | |
# reads ciphertext from ./in.txt and a whitespace-delimited | |
# dictionary of english words from ./dictionary/wordlist.txt | |
import string | |
from collections import defaultdict | |
from itertools import izip | |
# english dictionary | |
words = set() | |
# word pattern => list of corresponding english words | |
pattern_words = defaultdict(list) | |
def get_pattern(word): | |
"""Return the word pattern for the input word | |
>>> get_pattern("cat") | |
'abc' | |
>>> get_pattern("schoolbus") | |
'abcddefga' | |
>>> get_pattern("racecar") | |
'abcdcba' | |
Applying a substitution cipher to a word preserves | |
its word pattern. We'll exploit this for decryption. | |
""" | |
map = {} | |
letters = iter(string.lowercase) | |
pattern = '' | |
for char in word: | |
if char not in map: | |
map[char] = letters.next() | |
pattern += map[char] | |
return pattern | |
def translate(word, mapping): | |
"""Translates the word to the language represented by the mapping. | |
>>> map = defaultdict(lambda: None, {'o':'t', 'n':'w', 'e':'o'}) | |
>>> translate('one', map) | |
'two' | |
""" | |
for c in word: | |
if not mapping[c]: | |
return False | |
return ''.join([mapping[c] for c in word]) | |
def allowed(word, translation, mapping): | |
"""Return true if the translation works for some superset of `mapping`. | |
Returns true if there exists a superset of `mapping` that allows | |
the translation from `word` to `translation`. Returns false otherwise. | |
>>> map = defaultdict(lambda: None, {'x':'l', 'k':'p'}) | |
>>> allowed('xxkk', 'llpp', map) | |
True | |
>>> allowed('xaxkok', 'lmlpnp', map) | |
True | |
>>> allowed('xxkk', 'aabb', map) | |
False | |
""" | |
for char_orig, char_new in zip(word, translation): | |
if mapping[char_orig] and mapping[char_orig] != char_new: | |
return False | |
return True | |
def next_mappings(word, initial_mapping): | |
"""Iterate over all supersets of `initial_mapping` that translate `word`. | |
Returns a generator that iterates over supersets of `initial_mapping` | |
such that `word` translates to a word in the `words` dictionary. | |
Each superset will add only enough to `initial_mapping` to translate `word`. | |
""" | |
# look up possible translations by pattern | |
for translation in pattern_words[get_pattern(word)]: | |
# skip words that violate our initial mapping | |
if not allowed(word, translation, initial_mapping): | |
continue | |
# generate a new mapping by adding the letters in the translation | |
# to our initial mapping | |
new_mapping = initial_mapping.copy() | |
for k, v in zip(word, translation): | |
if not initial_mapping[k]: | |
new_mapping[k] = v | |
yield new_mapping | |
def find_mappings(mapping, cipher_words): | |
"""Find a mapping from the words in cipher_words to some | |
list of english words.""" | |
# no more cipher words to examine - return current mapping | |
if len(cipher_words) == 0: | |
yield mapping | |
return | |
# work with the first word | |
word = cipher_words[0] | |
# for each adjustment to the mapping that correctly translates `word`... | |
for current_mapping in next_mappings(word, mapping): | |
# (ignore the mapping if it contains a contradiction) | |
vals = [v for v in current_mapping.values() if v] | |
if translate(word, current_mapping) in words and \ | |
sorted(vals) == sorted(list(set(vals))): | |
# ...recursively attempt to translate the rest of the words | |
result = find_mappings(current_mapping, cipher_words[1:]) | |
for yielded in result: | |
yield yielded | |
# read in the dictionary words | |
with open('../dictionary/wordlist.txt') as f: | |
for word in f.read().split(): | |
words.add(word) | |
# compute and store the pattern for each dictionary word | |
for word in words: | |
pattern = get_pattern(word) | |
pattern_words[pattern] += [word] | |
# read in the input words | |
with open('in.txt') as f: | |
# read in the input as a lowercase string | |
ciphertext = ' '.join(f.readlines()).lower() | |
# strip out all non-alphabetical characters | |
input = ''.join(c for c in ciphertext if c.isspace() or c in string.lowercase) | |
# split by whitespace | |
input = input.split() | |
# remove duplicates | |
input = list(set(input)) | |
# sort by length (longer words first). This increases | |
# our chances of guessing words early on | |
input.sort(key = lambda x: -len(x)) | |
# show possible solutions | |
failure = True | |
for mapping, counter in izip(find_mappings(defaultdict(lambda: False), input), range(1000)): | |
failure = False | |
print str(counter + 1) + '.' | |
print ''.join((mapping[c] if c in mapping else c) for c in ciphertext) | |
print dict(mapping), "\n" | |
# show error if no solutions were found | |
if failure: | |
print "Failure: Are you sure the given substitution cipher decrypts to english?" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment