Created
August 20, 2012 14:26
-
-
Save ideamonk/3404700 to your computer and use it in GitHub Desktop.
Clojure/enlive/fun
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; http://www.reddit.com/r/dailyprogrammer/comments/y5sox/8132012_challenge_88_easy_vigenère_cipher/ | |
; this solution reads the problem & wikipedia in order to find the answer :) | |
(ns one.core | |
(:require [net.cgrand.enlive-html :as html]) | |
(:use [clojure.string :as str :only [split]] :reload)) | |
; | |
; part one - encrypt & decrypt | |
; | |
(println "Task 1 - encrypts THECAKEISALIE and decrypts it back -") | |
(def charset "ABCDEFGHIJKLMNOPQRSTUVWXYZ") | |
(def N (count charset)) | |
(defn value-for [x] | |
(.indexOf charset (str x))) | |
(defn char-for [x] | |
(nth charset x)) | |
(defn combine [op & l] | |
(char-for (mod (reduce op (map value-for l)) N))) | |
(defn crypto [op] | |
(fn [subject op-key] | |
(apply str | |
(map #(combine op %1 %2) subject (cycle op-key))))) | |
(def encrypt (crypto +)) | |
(def decrypt (crypto -)) | |
(println (encrypt "THECAKEISALIE" "GLADOS")) | |
(println (decrypt "ZSEFOCKTSDZAK" "GLADOS")) | |
; now comes the juicy part - | |
; | |
; part two - find key and decrypt challenge | |
; | |
(println "Task 2 - decrypts challenge -") | |
; - [1] | |
(defn fetch-url | |
"gets an html resource for url" | |
[url] | |
(html/html-resource (java.net.URL. url))) | |
(defn cleanup | |
"replaces non-letters with replacement, defaults to space" | |
([dom] (cleanup dom " ")) ; - [2] | |
([dom replacement] | |
(.replaceAll (.toUpperCase (html/text dom)) "[^A-Z]" replacement))) | |
(println " ... fetching reddit post") | |
; grab reddit post & build search space | |
(def page-html (fetch-url "http://www.reddit.com/r/dailyprogrammer/comments/y5sox/8132012_challenge_88_easy_vigenère_cipher/")) | |
(def problem-html (second (html/select page-html [:div.usertext-body :div.md]))) | |
(def problem-text (apply str (map cleanup (:content problem-html)))) | |
(def key-space (set (split problem-text #"\s+"))) | |
(def challenge-text (cleanup (nth (html/select problem-html [:pre]) 2) "")) | |
(defn get-words | |
"grabs n-hundred words from wiki" | |
([] (get-words 1)) | |
([n] | |
(let [ words-html (fetch-url "http://en.wiktionary.org/wiki/Wiktionary:Frequency_lists/PG/2006/04/1-10000") ] | |
(map #(.toUpperCase (first (:content %1))) | |
(html/select (take n (nthrest (html/select words-html [:div.mw-content-ltr :p]) 2)) [:a]))))) | |
(println " ... fetching word frequency") | |
; grab 200 most frequent english words to do probabilistic match | |
(def words-en-200 (get-words 2)) | |
(defn decryption-score | |
"scores decryption for similarity with English, returns [score, decryption, key-used] vector" | |
[decryption key-used] | |
[ (count (filter #(> (.indexOf decryption %1) 0) words-en-200)), decryption, key-used ]) | |
(println " ... finding best match") | |
(def best-match (apply max-key first (pmap #(decryption-score (decrypt challenge-text %1) %1) key-space))) | |
(println "The secret message is - " (second best-match) " (" (first best-match) " matching words ). Key used - " (last best-match)) | |
; Refs | |
; [1] - https://github.com/swannodette/enlive-tutorial/blob/master/src/tutorial/scrape1.clj | |
; [2] - http://stackoverflow.com/a/3208385/8786 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment