Created
May 11, 2016 21:49
-
-
Save jimador/597d42b7c2c538c683a9e927a0d71596 to your computer and use it in GitHub Desktop.
Wasting time when I should be going home
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns playground.word-counter) | |
| (def articles #{"a" "an" "the"}) | |
| (defn- weighted-count [count total] | |
| (.doubleValue (+ count (* count (/ count total))))) | |
| (defn- weighted-list [coll total] | |
| (map (fn [x] (weighted-count (second x) total)) coll)) | |
| (defn- get-top-freqs [words] | |
| (->> words | |
| frequencies | |
| (sort-by val) | |
| reverse | |
| (take 3) | |
| )) | |
| (defn- get-book [url] | |
| (-> url | |
| (slurp) | |
| (clojure.string/split-lines))) | |
| (defn- parse-text [text] | |
| (->> text | |
| (str) | |
| (re-seq #"[\w|']+") | |
| (map #(clojure.string/lower-case %)) | |
| (remove articles) | |
| (get-top-freqs))) | |
| (defn- parse-book [coll] | |
| (let [title (first coll) | |
| text (rest coll) | |
| top-words (parse-text text) | |
| word-count (count text) | |
| word-freq (weighted-list top-words word-count)] | |
| { | |
| :title title | |
| :top-words top-words | |
| :word-count word-count | |
| :word-freq word-freq | |
| } | |
| )) | |
| (defn- word-freq-parser [url] | |
| (-> url | |
| (get-book) | |
| (parse-book))) | |
| (defn parse-all-books [coll] | |
| (map #(word-freq-parser %) coll)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment