Skip to content

Instantly share code, notes, and snippets.

@jiridanek
Last active December 28, 2015 00:19
Show Gist options
  • Select an option

  • Save jiridanek/7412387 to your computer and use it in GitHub Desktop.

Select an option

Save jiridanek/7412387 to your computer and use it in GitHub Desktop.
# decodes messages from a file enciphered by substitution clipher # see http://norvig.com/ngrams/ for explanation and additional code # the hillclimb in decode_subst() is run in parallel
NKDIF SERLJ MIBFK FKDLV NQIBR HLCJU KFTFL KSTEN YQNDQ NTTEB TTENM QLJFS
NOSUM MLQTL CTENC QNKRE BTTBR HKLQT ELCBQ QBSFS KLTML SSFAI NLKBR RLUKT LCJUK
FTFLK FKSUC CFRFN KRYXB
CNLGV QVELH WTTAI LEHOT WEQVP CEBTQ FJNPP EDMFM LFCYF SQFSP NDHQF OEUTN
PPTPP CTDQN IFSQD TWHTN HHLFJ OLFSD HQFED HEGNQ TWVNQ HTNHH LFJWE BBITS PTHDT
XQQFO EUTYF SLFJE DEFDN IFSQG NLNGN PCTTQ EDOED FGQFI TLXNI
# decodes messages from a file enciphered by substitution clipher
# see http://norvig.com/ngrams/ for explanation and additional code
# the hillclimb in decode_subst() is run in parallel
from multiprocessing import Pool
import sys
import ngrams # written by Peter Norvig for his chapter in the book Beautiful Data
sys.setrecursionlimit(10000)
def func(msg):
steps=4000
return ngrams.hillclimb(ngrams.encode(msg, key=ngrams.cat(ngrams.shuffled(ngrams.alphabet))),
ngrams.logP3letters, ngrams.neighboring_msgs, steps)
def decode_subst(msg, restarts=90):
"Decode a substitution cipher with random restart hillclimbing."
pool = Pool()
msg = ngrams.cat(ngrams.allwords(msg))
candidates = pool.map(func, [msg]*restarts)
p, words = max(ngrams.segment2(c) for c in candidates)
return ' '.join(words)
texts = ['']
with open('cliphertexts.txt') as f:
lines = f.readlines()
for line in lines:
if line == '\n':
texts.append('')
else:
texts[-1] += ' ' + line.strip()
for i, text in enumerate(texts):
print 'CLIPHERTEXT', i
print '\t', text
print '\t', decode_subst(text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment