Last active
October 1, 2023 03:25
-
-
Save rbobillot/5a4936702e3f8c58c601958ba3afbcb8 to your computer and use it in GitHub Desktop.
Solving KRYTPOS ciphered messages (K1, K2, K3, and one day, K4), in Python (for educational purposes)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests # pip install requests | |
K = dict({ | |
'1': | |
'EMUFPHZLRFAXYUSDJKZLDKRNSHGNFIVJ' + | |
'YQTQUXQBQVYUVLLTREVJYQTMKYRDMFD', | |
'2': | |
'VFPJUDEEHZWETZYVGWHKKQETGFQJNCE' + | |
'GGWHKK?DQMCPFQZDQMMIAGPFXHQRLG' + | |
'TIMVMZJANQLVKQEDAGDVFRPJUNGEUNA' + | |
'QZGZLECGYUXUEENJTBJLBQCRTBJDFHRR' + | |
'YIZETKZEMVDUFKSJHKFWHKUWQLSZFTI' + | |
'HHDDDUVH?DWKBFUFPWNTDFIYCUQZERE' + | |
'EVLDKFEZMOQQJLTTUGSYQPFEUNLAVIDX' + | |
'FLGGTEZ?FKZBSFDQVGOGIPUFXHHDRKF' + | |
'FHQNTGPUAECNUVPDJMQCLQUMUNEDFQ' + | |
'ELZZVRRGKFFVOEEXBDMVPNFQXEZLGRE' + | |
'DNQFMPNZGLFLPMRJQYALMGNUVPDXVKP' + | |
'DQUMEBEDMHDAFMJGZNUPLGEWJLLAETG', | |
'3': | |
'ENDYAHROHNLSRHEOCPTEOIBIDYSHNAIA' + | |
'CHTNREYULDSLLSLLNOHSNOSMRWXMNE' + | |
'TPRNGATIHNRARPESLNNELEBLPIIACAE' + | |
'WMTWNDITEENRAHCTENEUDRETNHAEOE' + | |
'TFOLSEDTIWENHAEIOYTEYQHEENCTAYCR' + | |
'EIFTBRSPAMHHEWENATAMATEGYEERLB' + | |
'TEEFOASFIOTUETUAEOTOARMAEERTNRTI' + | |
'BSEDDNIAAHTTMSTEWPIEROAGRIEWFEB' + | |
'AECTDDHILCEIHSITEGOEAOSDDRYDLORIT' + | |
'RKLMLEHAGTDHARDPNEOHMGFMFEUHE' + | |
'ECDMRIPFEIMEHNLSSTTRTVDOHW?', | |
'4': | |
'OBKR' + | |
'UOXOGHULBSOLIFBBWFLRVQQPRNGKSSO' + | |
'TWTQSJQSSEKZZWATJKLUDIAWINFBNYP' + | |
'VTTMZFPKWGDKZXTJCDIGKUHUAUEKCAR' | |
}) | |
# ['A':'Z'] range | |
alphabet = [chr(c) for c in range(ord('A'), ord('Z')+1)] | |
# Keyed Vigenere alphabet (Quagmire III variation): KRYPTOSABCDEFGHIJLMNQUVWXZ | |
k_alphabet = 'KRYPTOS' + ''.join([c for c in alphabet if c not in 'KRYPTOS']) | |
# As KRYPTOS alphabet is keyed, we need to sort it alphabetically, | |
# keeping the previous indexes: [(7, 'A'), (8, 'B'), ..., (0, 'K'), ...] | |
# It will keep track of the offsets when ciphering/deciphering | |
indexed_alphabet = sorted(enumerate(k_alphabet), key=lambda nc: nc[1]) | |
# Used to try to determine the ciphering used for each parts | |
# You can copy/paste the list, | |
# and ask ChatGPT if it can find a corresponding language | |
# (it works) | |
def char_frequency_analysis(topic, s): | |
message = s.upper() | |
occurences = dict([(c, 0) for c in set(s)]) | |
frequencies = dict([(c, 0) for c in set(s)]) | |
for c in s: | |
occurences.update({ c : occurences[c] + 1 }) | |
for c in s: | |
frequencies.update({ c : str(round(occurences[c] / len(s) * 100, 2)) + '%' }) | |
print(topic, sorted(list(frequencies.items()), key=lambda kv : -float(kv[1][:-1]))) | |
# Wordlist of english words, | |
# used to find keys for Vigenere ciphered messages | |
def get_wordlist(): # ~250k words | |
if os.path.exists('/usr/share/dict/words'): | |
with open('/usr/share/dict/words', 'r') as local_system_dictionary: | |
words = list(set(local_system_dictionary.read().splitlines())) | |
else: | |
words = list(set(requests.get( | |
'https://gist.githubusercontent.com/rbobillot/' + | |
'7228ad9b10cbb0e3a893ec5b3d0baaef/raw/' + | |
'a46ccad57b0fd308875213059b16b7d687373720/' + | |
'osx_wordlist.txt').text.split('\n'))) | |
return sorted([l.upper() for l in words], key=lambda l: -len(l)) | |
# Most common english words wordlist, | |
# used to score deciphered messages | |
def get_common_english_words(): # ~10k words | |
words = list(set(requests.get( | |
'https://gist.githubusercontent.com/rbobillot/' + | |
'e9009d2b7076f8e3cf707a0aae91a734/raw/' + | |
'9beece4d81869d2e7562fd372912532750ec4172/' + | |
'most_common_english_words.txt').text.split('\n'))) | |
return sorted([l.upper() for l in words if len(l) > 3], key=lambda l: -len(l)) | |
def attempt_score(attempt, best_attempt, sub_words, key=''): | |
sub_words_counter = 0 | |
for w in sub_words: | |
if w in attempt: | |
sub_words_counter += 1 | |
if sub_words_counter > best_attempt[1]: | |
best_attempt = (attempt, sub_words_counter, key) | |
return best_attempt | |
# This function applies the Vigenere algorithm (cipher/decipher) | |
# to any message, using the KRYPTOS custom dictionary | |
def kryptos_vigenere(s, key, decipher = False): | |
# pre-compute key, masking the whole message | |
mask = (key * (1 + int(len(s) / len(key))))[:len(s)] | |
def alpha_index(c): | |
return ord(c) - ord('A') | |
def cipher_at_index(i): | |
offset = indexed_alphabet[alpha_index(mask[i])][0] | |
letter_index = indexed_alphabet[alpha_index(s[i])][0] | |
if decipher: | |
# add 26 to avoid negative index | |
letter = k_alphabet[(letter_index - offset + 26) % 26] | |
else: | |
letter = k_alphabet[(letter_index + offset) % 26] | |
return letter | |
return ''.join([cipher_at_index(i) for i in range(0, len(s))]) | |
# This function will try every words from an English dictionary | |
# To speed up things a bit, we filter the words to test: | |
# - for the potential Key list, only words with a length interval of [8:11] | |
# - for the subwords to find within a decipher attempt, a length interval of [5:6] | |
def bruteforce_vigenere(s): | |
keys_list = [w for w in get_wordlist() if len(w) > 4 and len(w) < 12] | |
sub_words = get_common_english_words() | |
progression = 0 | |
best_attempt = ('', 0, '') | |
print(len(keys_list), 'possible words to test') | |
print(len(sub_words), 'subwords to test (normal sized words)') | |
print('progression: (decipher_attempt, english_words_count, best_key) current_key') | |
for index, key in enumerate(keys_list): | |
current_progression = round(index / len(keys_list) * 100) | |
attempt = kryptos_vigenere(s, key, decipher=True) | |
best_attempt = attempt_score(attempt, best_attempt, sub_words, key) | |
if current_progression > progression: | |
progression = current_progression | |
print(str(progression) + '%:', best_attempt, '\033[90m' + key + '\033[0m') | |
# As frequency analysis reveals that K3 is plain english, | |
# an a simple string reverse shows that letters are scrambled | |
# we might deduce that a transposition algorithm was used. | |
# K3 is 337 letters long (including the '?' character) which is a prime number | |
# Hence the message cannot fit in a simple transposition matrix, | |
# unless the '?' is not mandatory in the transposition. | |
def transpose(s, offset): | |
size = len(s) | |
return ''.join([s[(i * offset - 1) % size] for i in range(1, size + 1)]) | |
# Then most efficient bruteforce method is to shift letters of the ciphered message | |
# using a growing offset (from 1 to len(ciphered_message)) | |
def bruteforce_transposition(s): | |
best_attempt = ('', 0, '') | |
progression = 0 | |
sub_words = get_common_english_words() | |
for index in range(1, len(s)): | |
current_progression = round(index / len(s) * 100) | |
attempt = transpose(s, index) | |
best_attempt = attempt_score(attempt, best_attempt, sub_words) | |
if current_progression > progression: | |
progression = current_progression | |
print(str(progression) + '%:', best_attempt[:2]) | |
return s | |
def solve_part_1(): # Vigenere key: PALIMPSEST | |
s = K['1'].replace('?', '') | |
bruteforce_vigenere(s) | |
def solve_part_2(): # Vigenere key: ABSCISSA | |
s = K['2'].replace('?', '') | |
bruteforce_vigenere(s) | |
def solve_part_3(): # Transposition offset: 192 | |
s = K['3'] # no need to remove '?' character (mandatory in transposition) | |
bruteforce_transposition(s) | |
def solve_part_4(): | |
s = K['4'] | |
bruteforce_vigenere(s) | |
print('NO SOLUTION YET') | |
#char_frequency_analysis('K1 Freq Analysis ->', K['1']) | |
#char_frequency_analysis('K2 Freq Analysis ->', K['2']) | |
#char_frequency_analysis('K3 Freq Analysis ->', K['3']) | |
#char_frequency_analysis('K4 Freq Analysis ->', K['4']) | |
#solve_part_1() | |
#solve_part_2() | |
#solve_part_3() | |
solve_part_4() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
About K4:
http://www.thekryptosproject.com/kryptos/k0-k5/k4.php
http://numberworld.blogspot.com/2017/03/kryptos-cipher-part-1.html
http://numberworld.blogspot.com/2017/03/kryptos-cipher-part-2.html
https://kryptosfan.wordpress.com/tag/jim-sanborn/
Possible clues
Other possible ciphering algorithms ?
https://en.wikipedia.org/wiki/Hill_cipher (did not test yet)
https://en.wikipedia.org/wiki/Beaufort_cipher (did not test yet)