Last active
December 28, 2015 00:19
-
-
Save jiridanek/7412387 to your computer and use it in GitHub Desktop.
# decodes messages from a file enciphered by substitution clipher
# see http://norvig.com/ngrams/ for explanation and additional code
# the hillclimb in decode_subst() is run in parallel
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| NKDIF SERLJ MIBFK FKDLV NQIBR HLCJU KFTFL KSTEN YQNDQ NTTEB TTENM QLJFS | |
| NOSUM MLQTL CTENC QNKRE BTTBR HKLQT ELCBQ QBSFS KLTML SSFAI NLKBR RLUKT LCJUK | |
| FTFLK FKSUC CFRFN KRYXB | |
| CNLGV QVELH WTTAI LEHOT WEQVP CEBTQ FJNPP EDMFM LFCYF SQFSP NDHQF OEUTN | |
| PPTPP CTDQN IFSQD TWHTN HHLFJ OLFSD HQFED HEGNQ TWVNQ HTNHH LFJWE BBITS PTHDT | |
| XQQFO EUTYF SLFJE DEFDN IFSQG NLNGN PCTTQ EDOED FGQFI TLXNI |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # decodes messages from a file enciphered by substitution clipher | |
| # see http://norvig.com/ngrams/ for explanation and additional code | |
| # the hillclimb in decode_subst() is run in parallel | |
| from multiprocessing import Pool | |
| import sys | |
| import ngrams # written by Peter Norvig for his chapter in the book Beautiful Data | |
| sys.setrecursionlimit(10000) | |
| def func(msg): | |
| steps=4000 | |
| return ngrams.hillclimb(ngrams.encode(msg, key=ngrams.cat(ngrams.shuffled(ngrams.alphabet))), | |
| ngrams.logP3letters, ngrams.neighboring_msgs, steps) | |
| def decode_subst(msg, restarts=90): | |
| "Decode a substitution cipher with random restart hillclimbing." | |
| pool = Pool() | |
| msg = ngrams.cat(ngrams.allwords(msg)) | |
| candidates = pool.map(func, [msg]*restarts) | |
| p, words = max(ngrams.segment2(c) for c in candidates) | |
| return ' '.join(words) | |
| texts = [''] | |
| with open('cliphertexts.txt') as f: | |
| lines = f.readlines() | |
| for line in lines: | |
| if line == '\n': | |
| texts.append('') | |
| else: | |
| texts[-1] += ' ' + line.strip() | |
| for i, text in enumerate(texts): | |
| print 'CLIPHERTEXT', i | |
| print '\t', text | |
| print '\t', decode_subst(text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment