Skip to content

Instantly share code, notes, and snippets.

@timakro
Last active November 13, 2020 21:56
Show Gist options
  • Save timakro/fdd84d5ede03ba224e013e20bfadfb97 to your computer and use it in GitHub Desktop.
Save timakro/fdd84d5ede03ba224e013e20bfadfb97 to your computer and use it in GitHub Desktop.
Probabilistic ROT-n cracker
#!/usr/bin/python
# wordlist.txt is a tab-delimited word frequency list.
# A list for the German language can be found here:
# https://github.com/gambolputty/dewiki-wordrank
import sys
import re
from collections import Counter
from string import ascii_lowercase as letters
MAXWORDS = 10000
text = open(sys.argv[1]).read().lower()
wordcount = Counter(re.findall(r'[a-z]+', text))
wordfreq = {}
for i, line in enumerate(open('wordlist.txt')):
if i == MAXWORDS:
break
word, count = line.split()
word = word.replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss')
if re.match(r'^[a-z]+$', word):
wordfreq[word] = int(count)
score = {n: 0 for n in range(1, 26)}
for shift in score:
rot = str.maketrans(letters, (letters*2)[shift:shift+26])
for word, freq in wordfreq.items():
count = wordcount[word.translate(rot)]
if count:
score[shift] += count * freq
winner = max(score, key=score.get)
backrot = str.maketrans(letters, (letters*2)[26-winner:26+26-winner])
print(text.translate(backrot))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment