Last active
May 27, 2018 21:31
-
-
Save L-TChen/add31629e2f6c8e2ea1416646df7fc86 to your computer and use it in GitHub Desktop.
FLOLAC'18 Exercise: A case-sensitive Caesar cipher and decipher based on frequency analysis, written in Haskell.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE UnicodeSyntax #-} | |
{-# LANGUAGE TupleSections #-} | |
module Caesar where | |
import Data.Map (Map, fromList, fromListWith, unionWith) | |
import Data.Char | |
import Data.List | |
import Data.Function | |
encode ∷ Int → String → String | |
encode n str = rotate n <$> str | |
decode ∷ String → (String, Int) | |
decode xs = fst $ minimumBy (compare `on` snd) $ scores xs | |
scores ∷ String → [((String, Int), Float)] | |
scores xs = do | |
i ← [0..25] | |
let cand = encode (-i) xs in | |
return ((cand, i), distance refFreqTab $ mkFreqTab cand) | |
-- Rotate English alphabet only | |
rotate ∷ Int → Char → Char | |
rotate m x = case (isAsciiUpper x, isAsciiLower x) of | |
(True, _) → chr $ ((m + ord x - ord 'A') `mod` 26) + ord 'A' | |
(_, True) → chr $ ((m + ord x - ord 'a') `mod` 26) + ord 'a' | |
_ → x | |
-- Euclidean distance | |
distance ∷ Map Char Float → Map Char Float → Float | |
distance xs ys = sqrt $ sum $ unionWith (\x y → (x - y)^2) xs ys | |
-- Make a frequence table for English alphabet | |
mkFreqTab ∷ String → Map Char Float | |
mkFreqTab xs = (100/total *) <$> fromListWith (+) (((,1) <$> xs') ++ ((,0) <$> ['A'..'Z'])) | |
where xs' = filter isAsciiUpper (toUpper <$> xs) | |
total = fromIntegral $ length xs' | |
-- https://en.wikipedia.org/wiki/Letter_frequency#Relative_frequencies_of_letters_in_the_English_language | |
refFreqTab ∷ Map Char Float | |
refFreqTab = fromList | |
[('E', 12.702), ('T', 9.056), ('A', 8.167), ('O', 7.507), ('I', 6.966), ('N', 6.749), | |
('S', 6.749), ('H', 6.094), ('R', 5.987), ('D', 4.253), ('L', 4.025), ('C', 2.782), | |
('U', 2.758), ('M', 2.406), ('W', 2.36), ('F', 2.228), ('G', 2.015), ('Y', 1.974), | |
('P', 1.929), ('B', 1.492), ('V', 0.978), ('K', 0.772), ('J', 0.153), ('X', 0.15), | |
('Q', 0.095), ('Z', 0.074)] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment