Skip to content

Instantly share code, notes, and snippets.

@cammckinnon
Created April 15, 2012 04:49
Show Gist options
  • Save cammckinnon/2390127 to your computer and use it in GitHub Desktop.
Save cammckinnon/2390127 to your computer and use it in GitHub Desktop.
substitution cipher decryption
# python 2.7.3
# reads ciphertext from ./in.txt and a whitespace-delimited
# dictionary of english words from ./dictionary/wordlist.txt
import string
from collections import defaultdict
from itertools import izip
# english dictionary
words = set()
# word pattern => list of corresponding english words
pattern_words = defaultdict(list)
def get_pattern(word):
"""Return the word pattern for the input word
>>> get_pattern("cat")
'abc'
>>> get_pattern("schoolbus")
'abcddefga'
>>> get_pattern("racecar")
'abcdcba'
Applying a substitution cipher to a word preserves
its word pattern. We'll exploit this for decryption.
"""
map = {}
letters = iter(string.lowercase)
pattern = ''
for char in word:
if char not in map:
map[char] = letters.next()
pattern += map[char]
return pattern
def translate(word, mapping):
"""Translates the word to the language represented by the mapping.
>>> map = defaultdict(lambda: None, {'o':'t', 'n':'w', 'e':'o'})
>>> translate('one', map)
'two'
"""
for c in word:
if not mapping[c]:
return False
return ''.join([mapping[c] for c in word])
def allowed(word, translation, mapping):
"""Return true if the translation works for some superset of `mapping`.
Returns true if there exists a superset of `mapping` that allows
the translation from `word` to `translation`. Returns false otherwise.
>>> map = defaultdict(lambda: None, {'x':'l', 'k':'p'})
>>> allowed('xxkk', 'llpp', map)
True
>>> allowed('xaxkok', 'lmlpnp', map)
True
>>> allowed('xxkk', 'aabb', map)
False
"""
for char_orig, char_new in zip(word, translation):
if mapping[char_orig] and mapping[char_orig] != char_new:
return False
return True
def next_mappings(word, initial_mapping):
"""Iterate over all supersets of `initial_mapping` that translate `word`.
Returns a generator that iterates over supersets of `initial_mapping`
such that `word` translates to a word in the `words` dictionary.
Each superset will add only enough to `initial_mapping` to translate `word`.
"""
# look up possible translations by pattern
for translation in pattern_words[get_pattern(word)]:
# skip words that violate our initial mapping
if not allowed(word, translation, initial_mapping):
continue
# generate a new mapping by adding the letters in the translation
# to our initial mapping
new_mapping = initial_mapping.copy()
for k, v in zip(word, translation):
if not initial_mapping[k]:
new_mapping[k] = v
yield new_mapping
def find_mappings(mapping, cipher_words):
"""Find a mapping from the words in cipher_words to some
list of english words."""
# no more cipher words to examine - return current mapping
if len(cipher_words) == 0:
yield mapping
return
# work with the first word
word = cipher_words[0]
# for each adjustment to the mapping that correctly translates `word`...
for current_mapping in next_mappings(word, mapping):
# (ignore the mapping if it contains a contradiction)
vals = [v for v in current_mapping.values() if v]
if translate(word, current_mapping) in words and \
sorted(vals) == sorted(list(set(vals))):
# ...recursively attempt to translate the rest of the words
result = find_mappings(current_mapping, cipher_words[1:])
for yielded in result:
yield yielded
# read in the dictionary words
with open('../dictionary/wordlist.txt') as f:
for word in f.read().split():
words.add(word)
# compute and store the pattern for each dictionary word
for word in words:
pattern = get_pattern(word)
pattern_words[pattern] += [word]
# read in the input words
with open('in.txt') as f:
# read in the input as a lowercase string
ciphertext = ' '.join(f.readlines()).lower()
# strip out all non-alphabetical characters
input = ''.join(c for c in ciphertext if c.isspace() or c in string.lowercase)
# split by whitespace
input = input.split()
# remove duplicates
input = list(set(input))
# sort by length (longer words first). This increases
# our chances of guessing words early on
input.sort(key = lambda x: -len(x))
# show possible solutions
failure = True
for mapping, counter in izip(find_mappings(defaultdict(lambda: False), input), range(1000)):
failure = False
print str(counter + 1) + '.'
print ''.join((mapping[c] if c in mapping else c) for c in ciphertext)
print dict(mapping), "\n"
# show error if no solutions were found
if failure:
print "Failure: Are you sure the given substitution cipher decrypts to english?"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment