Last active
May 25, 2021 14:46
-
-
Save kylebgorman/5d1ca713b7e288f89db8ef174347b7be to your computer and use it in GitHub Desktop.
Zodiac cipher 408: freestanding Python 3 script for converting the plaintext and ciphertext to OpenFst assets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Constructs resources for Zodiac cipher 408: | |
# | |
# * Plaintext and ciphertext FARs | |
# * Unweighted "key" FSTs and "channel" (hypothesis space) FSTs | |
# * A textual symbol table for plaintext and ciphertext | |
# | |
# Requires: Pynini and OpenFst with the FAR extension. | |
import itertools | |
import os | |
import subprocess | |
import pynini | |
# Output filenames. | |
PLAINTEXT_TXT = "plaintext.txt" | |
PLAINTEXT_STD_FST = "plaintext.fst" | |
PLAINTEXT_LOG_FST = "plaintext-log.fst" | |
CIPHERTEXT_TXT = "ciphertext.txt" | |
CIPHERTEXT_STD_FST = "ciphertext.fst" | |
CIPHERTEXT_LOG_FST = "ciphertext-log.fst" | |
KEY_STD_FST = "key.fst" | |
KEY_LOG_FST = "key-log.fst" | |
CHANNEL_STD_FST = "channel.fst" | |
CHANNEL_LOG_FST = "channel-log.fst" | |
SYMBOL_TABLE = "sym.map" | |
# Plaintext, ciphertext, and a table from plaintext to ciphertext characters. | |
# The data is drawn from the "Harper key" available here: | |
# | |
# http://zodiackillerciphers.com/408/key.html | |
# | |
# Visually similar Unicode codepoints for the non-ASCII ciphertext characters | |
# were selected using: | |
# | |
# http://shapecatcher.com/ | |
# | |
# The following interpretations are applied. | |
# | |
# Misspellings unlikely to be due to polyphony or encipherment mistakes: | |
# | |
# * FORREST for FOREST (fn. 6) | |
# * EXPERENCE for EXPERIENCE (fn. 10) | |
# * PARADICE for PARADISE (fn. 14) | |
# * ANAMAL for ANIMAL (ciphertext ▲, plaintext A [4] and S [3]; could be due to | |
# overinking of ciphertext △, read as plaintext I, but treating this as a | |
# mispelling allows us to preserve two early cycles) | |
# | |
# Mispellings which are probably encipherment mistakes: | |
# | |
# * DANGERTUE for DANGEROUS (fn. 8; ciphertext 🜴YE) | |
# * THAE for THAT (fn. 13; ciphertext N, plaintext E [8]) | |
# * SND for AND (fn. 15; ciphertext ◭, plaintext S [2]) | |
# * SLOI for SLOW (fn. 18; ciphertext △, plaintext I [13]) | |
# | |
# Other plaintext typos: | |
# | |
# * "...AND ALL THE [people?--ed.] I HAVE KILLED" | |
# | |
# True polyphones: | |
# | |
# * Ciphertext ▲ for plaintext A [4] and S [3] | |
PLAINTEXT = ("I", "L", "I", "K", "E", "K", "I", "L", "L", "I", "N", "G", "P", | |
"E", "O", "P", "L", "E", "B", "E", "C", "A", "U", "S", "E", "I", | |
"T", "I", "S", "S", "O", "M", "U", "C", "H", "F", "U", "N", "I", | |
"T", "I", "S", "M", "O", "R", "E", "F", "U", "N", "T", "H", "A", | |
"N", "K", "I", "L", "L", "I", "N", "G", "W", "I", "L", "D", "G", | |
"A", "M", "E", "I", "N", "T", "H", "E", "F", "O", "R", "R", "E", | |
"S", "T", "B", "E", "C", "A", "U", "S", "E", "M", "A", "N", "I", | |
"S", "T", "H", "E", "M", "O", "S", "T", "D", "A", "N", "G", "E", | |
"R", "T", "U", "E", "A", "N", "A", "M", "A", "L", "O", "F", "A", | |
"L", "L", "T", "O", "K", "I", "L", "L", "S", "O", "M", "E", "T", | |
"H", "I", "N", "G", "G", "I", "V", "E", "S", "M", "E", "T", "H", | |
"E", "M", "O", "S", "T", "T", "H", "R", "I", "L", "L", "I", "N", | |
"G", "E", "X", "P", "E", "R", "E", "N", "C", "E", "I", "T", "I", | |
"S", "E", "V", "E", "N", "B", "E", "T", "T", "E", "R", "T", "H", | |
"A", "N", "G", "E", "T", "T", "I", "N", "G", "Y", "O", "U", "R", | |
"R", "O", "C", "K", "S", "O", "F", "F", "W", "I", "T", "H", "A", | |
"G", "I", "R", "L", "T", "H", "E", "B", "E", "S", "T", "P", "A", | |
"R", "T", "O", "F", "I", "T", "I", "S", "T", "H", "A", "E", "W", | |
"H", "E", "N", "I", "D", "I", "E", "I", "W", "I", "L", "L", "B", | |
"E", "R", "E", "B", "O", "R", "N", "I", "N", "P", "A", "R", "A", | |
"D", "I", "C", "E", "S", "N", "D", "A", "L", "L", "T", "H", "E", | |
"I", "H", "A", "V", "E", "K", "I", "L", "L", "E", "D", "W", "I", | |
"L", "L", "B", "E", "C", "O", "M", "E", "M", "Y", "S", "L", "A", | |
"V", "E", "S", "I", "W", "I", "L", "L", "N", "O", "T", "G", "I", | |
"V", "E", "Y", "O", "U", "M", "Y", "N", "A", "M", "E", "B", "E", | |
"C", "A", "U", "S", "E", "Y", "O", "U", "W", "I", "L", "L", "T", | |
"R", "Y", "T", "O", "S", "L", "O", "I", "D", "O", "W", "N", "O", | |
"R", "S", "T", "O", "P", "M", "Y", "C", "O", "L", "L", "E", "C", | |
"T", "I", "N", "G", "O", "F", "S", "L", "A", "V", "E", "S", "F", | |
"O", "R", "M", "Y", "A", "F", "T", "E", "R", "L", "I", "F", "E", | |
"E", "B", "E", "O", "R", "I", "E", "T", "E", "M", "E", "T", "H", | |
"H", "P", "I", "T", "I",) | |
assert len(PLAINTEXT) == 408 | |
# A relatively loose reading of the plaintext, with errors corrected, spaces | |
# and punctuation inserted, and the final padding removed, would read: | |
# | |
# "I like killing people because it is so much fun. It is more fun than killing | |
# wild game in the forest because man is the most dangerous animal of all. To | |
# kill something gives me the most thrilling experience. It is even better than | |
# getting your rocks off with a girl. The best part of it is that when I die, I | |
# will be reborn in paradise and all the people I have killed will become my | |
# slaves. I will not give you my name because you will try to slow down or stop | |
# my collecting of slaves for my afterlife." | |
CIPHERTEXT = ( | |
# Part 1. | |
"△", "◪", "P", "⟋", "Z", "⟋", "U", "B", "◪", "𝈲", "O", | |
"R", "⚻", "ꟼ", "X", "⚻", "B", | |
"W", "V", "+", "Ǝ", "G", "Y", "F", "ტ", "△", "H", "P", | |
"⊡", "K", "🜴", "⌕", "Y", "Ǝ", | |
"M", "J", "Y", "Λ", "U", "I", "𝈲", "◭", "⌕", "T", "⥿", | |
"N", "Q", "Y", "D", "●", "ꝋ", | |
"S", "ϕ", "⟋", "△", "■", "B", "P", "O", "R", "A", "U", | |
"◪", "ꟻ", "R", "⅃", "⌕", "E", | |
"𝈲", "Λ", "L", "M", "Z", "J", "ᗡ", "Я", "⟍", "ꟼ", "F", | |
"H", "V", "W", "Ǝ", "▲", "Y", | |
"⊡", "+", "⌕", "G", "D", "△", "K", "I", "ꝋ", "ტ", "⌕", | |
"X", "▲", "●", "⌖", "S", "ϕ", | |
"R", "N", "⥿", "I", "Y", "E", "⅃", "O", "▲", "⌕", "G", | |
"B", "T", "Q", "S", "■", "B", | |
"L", "ᗡ", "⟋", "P", "■", "B", "⊡", "X", "⌕", "E", "H", | |
"M", "U", "Λ", "R", "R", "𝈲", | |
# Part 2. | |
"ɔ", "Z", "K", "⌕", "ꟼ", "I", "ꝋ", "W", "⌕", "🜴", "▲", | |
"●", "L", "M", "Я", "△", "■", | |
"B", "P", "D", "R", "+", "Ꚍ", "⚻", "ტ", "⟍", "N", "ϕ", | |
"Ǝ", "E", "U", "H", "𝈲", "F", | |
"Z", "ɔ", "ꟼ", "O", "V", "W", "I", "●", "+", "⥿", "L", | |
"ꝋ", "⅃", "Λ", "R", "ტ", "H", | |
"I", "△", "D", "R", "□", "T", "Y", "Я", "⟍", "ᗡ", "Ǝ", | |
"⟋", "⊡", "X", "J", "Q", "A", | |
"P", "●", "M", "▲", "R", "U", "⥿", "◪", "L", "ꝋ", "N", | |
"V", "E", "K", "H", "⚻", "G", | |
"Я", "I", "🜴", "J", "𝈲", "●", "△", "▲", "L", "M", "⅃", | |
"N", "A", "ꝋ", "Z", "ϕ", "P", | |
"⌖", "U", "ꟼ", "𝈲", "A", "△", "■", "B", "V", "W", "⟍", | |
"+", "V", "T", "⥿", "O", "P", | |
"Λ", "⚻", "S", "Я", "⅃", "ꟻ", "U", "Ǝ", "ტ", "◭", "D", | |
"⌖", "G", "◪", "◪", "I", "M", | |
# Part 3. | |
"N", "𝈲", "ꝋ", "S", "ɔ", "E", "⟋", "△", "◪", "◪", "Z", | |
"ꟻ", "A", "P", "■", "B", "V", | |
"ꟼ", "Ǝ", "X", "⌕", "W", "⌕", "□", "F", "■", "▲", "ɔ", | |
"+", "⊡", "△", "A", "△", "B", | |
"◪", "O", "T", "●", "R", "U", "ɔ", "+", "□", "ᗡ", "Y", | |
"⌕", "□", "Λ", "S", "⌕", "W", | |
"V", "Z", "Ǝ", "G", "Y", "K", "E", "□", "T", "Y", "A", | |
"△", "◪", "■", "L", "⥿", "□", | |
"H", "🜴", "F", "B", "X", "△", "⌖", "X", "A", "D", "ᗡ", | |
"⟍", "◭", "L", "🜴", "⚻", "⌕", | |
"□", "Ǝ", "ᗡ", "■", "■", "ტ", "Ǝ", "●", "P", "O", "R", | |
"X", "Q", "F", "◪", "G", "ɔ", | |
"Z", "⊡", "J", "T", "⥿", "⌕", "□", "▲", "J", "I", "+", | |
"Я", "B", "P", "Q", "W", "ტ", | |
"V", "E", "X", "Я", "△", "W", "I", "ტ", "⌕", "E", "H", | |
"M", "ꝋ", "⚻", "U", "I", "𝈲",) | |
assert len(CIPHERTEXT) == 408 | |
KEY = {"A": {"G", "S", "⅃", "▲",}, | |
"B": {"V",}, | |
"C": {"Ǝ",}, | |
"D": {"ꟻ", "⌖",}, | |
"E": {"Z", "ꟼ", "W", "+", "ტ", "N", "E",}, | |
"F": {"J", "Q",}, | |
"G": {"R",}, | |
"H": {"M", "ꝋ",}, | |
"I": {"△", "P", "U", "𝈲",}, | |
"K": {"⟋",}, | |
"L": {"◪", "B", "■",}, | |
"M": {"⌕",}, | |
"N": {"O", "Λ", "D", "ϕ",}, | |
"O": {"X", "🜴", "T", "ᗡ",}, | |
"P": {"⚻",}, | |
"R": {"⥿", "Я", "⟍",}, | |
"S": {"F", "⊡", "K", "◭", "▲",}, | |
"T": {"H", "I", "●", "L",}, | |
"U": {"Y",}, | |
"V": {"ɔ",}, | |
"W": {"A",}, | |
"X": {"Ꚍ",}, | |
"Y": {"□",},} | |
def _check_alignment(): | |
return all(c in KEY[p] for (p, c) in zip(PLAINTEXT, CIPHERTEXT)) | |
def _make_plaintext(): | |
with open(PLAINTEXT_TXT, "w") as sink: | |
print(" ".join(str(ord(p)) for p in PLAINTEXT), file=sink) | |
subprocess.check_call(("farcompilestrings", "--arc_type=standard", | |
"--far_type=fst", "--fst_type=compact", | |
PLAINTEXT_TXT, PLAINTEXT_STD_FST)) | |
subprocess.check_call(("farcompilestrings", "--arc_type=log", | |
"--far_type=fst", "--fst_type=compact", | |
PLAINTEXT_TXT, PLAINTEXT_LOG_FST)) | |
os.remove(PLAINTEXT_TXT) | |
def _make_ciphertext(): | |
with open(CIPHERTEXT_TXT, "w") as sink: | |
print(" ".join(str(ord(c)) for c in CIPHERTEXT), file=sink) | |
subprocess.check_call(("farcompilestrings", "--arc_type=standard", | |
"--far_type=fst", "--fst_type=compact", | |
CIPHERTEXT_TXT, CIPHERTEXT_STD_FST)) | |
subprocess.check_call(("farcompilestrings", "--arc_type=log", | |
"--far_type=fst", "--fst_type=compact", | |
CIPHERTEXT_TXT, CIPHERTEXT_LOG_FST)) | |
os.remove(CIPHERTEXT_TXT) | |
def _make_key(): | |
eps = pynini.epsilon_machine() | |
# Adds arcs. | |
for (plaintext, ciphertexts) in KEY.items(): | |
plaintext_label = ord(plaintext) | |
for ciphertext in ciphertexts: | |
ciphertext_label = ord(ciphertext) | |
eps.add_arc(0, pynini.Arc(plaintext_label, ciphertext_label, 0, 0)) | |
# Writes it out. | |
eps.write(KEY_STD_FST) | |
pynini.arcmap(eps, map_type="to_log").write(KEY_LOG_FST) | |
def _make_channel(): | |
eps = pynini.epsilon_machine() | |
# Adds arcs. | |
for plaintext in KEY.keys(): | |
plaintext_label = ord(plaintext) | |
for ciphertexts in KEY.values(): | |
for ciphertext in ciphertexts: | |
ciphertext_label = ord(ciphertext) | |
eps.add_arc(0, pynini.Arc(plaintext_label, ciphertext_label, 0, 0)) | |
eps = pynini.arcmap(eps, map_type="arc_sum") | |
# Writes it out. | |
eps.write(CHANNEL_STD_FST) | |
pynini.arcmap(eps, map_type="to_log").write(CHANNEL_LOG_FST) | |
def _make_symbol_table(): | |
symbols = set() | |
# Adds plaintext symbols. | |
for plaintext in KEY.keys(): | |
symbols.add(plaintext) | |
# Adds ciphertext symbols. | |
for ciphertexts in KEY.values(): | |
symbols.update(ciphertexts) | |
# Writes them into a symbol table. | |
sym = pynini.SymbolTable() | |
for symbol in sorted(symbols): | |
sym.add_symbol(symbol, ord(symbol)) | |
# Writes it out. | |
sym.write_text(SYMBOL_TABLE) | |
def main(): | |
assert _check_alignment() | |
_make_plaintext() | |
_make_ciphertext() | |
_make_key() | |
_make_channel() | |
_make_symbol_table() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
maybe this, not tested: