Last active
November 13, 2020 21:56
-
-
Save timakro/fdd84d5ede03ba224e013e20bfadfb97 to your computer and use it in GitHub Desktop.
Probabilistic ROT-n cracker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# wordlist.txt is a tab-delimited word frequency list. | |
# A list for the German language can be found here: | |
# https://github.com/gambolputty/dewiki-wordrank | |
import sys | |
import re | |
from collections import Counter | |
from string import ascii_lowercase as letters | |
MAXWORDS = 10000 | |
text = open(sys.argv[1]).read().lower() | |
wordcount = Counter(re.findall(r'[a-z]+', text)) | |
wordfreq = {} | |
for i, line in enumerate(open('wordlist.txt')): | |
if i == MAXWORDS: | |
break | |
word, count = line.split() | |
word = word.replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss') | |
if re.match(r'^[a-z]+$', word): | |
wordfreq[word] = int(count) | |
score = {n: 0 for n in range(1, 26)} | |
for shift in score: | |
rot = str.maketrans(letters, (letters*2)[shift:shift+26]) | |
for word, freq in wordfreq.items(): | |
count = wordcount[word.translate(rot)] | |
if count: | |
score[shift] += count * freq | |
winner = max(score, key=score.get) | |
backrot = str.maketrans(letters, (letters*2)[26-winner:26+26-winner]) | |
print(text.translate(backrot)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment