Created
August 20, 2014 17:22
-
-
Save triclops200/99a085d6067ebe1646af to your computer and use it in GitHub Desktop.
markovV2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn increment-counter [res word] | |
(update-in res [word] (fnil inc 0))) | |
(defn increment-link [dict word1 word2] | |
(update-in dict [word1] increment-counter word2)) | |
(defn split-text [text] | |
(filter #(not (re-matches #"\s+" %)) (clojure.string/split text #"[.!?>\-\"\*\)\(]"))) | |
(defn fix-sentences [sentences] | |
(map #(str "> " (clojure.string/trim %) " .") sentences)) | |
(defn split-sentences [sentences] | |
(filter #(not (empty? %)) | |
(map #(clojure.string/split (clojure.string/lower-case %) #"\s+| +") sentences))) | |
(defn zip [& colls] | |
(apply map vector colls)) | |
(defn slurp-sentence [dict sentence] | |
(reduce #(apply increment-link %1 %2) dict (zip sentence (rest sentence)))) | |
(defn slurp-text [text] | |
(reduce slurp-sentence {} | |
(-> text | |
split-text | |
fix-sentences | |
split-sentences))) | |
(defn get-sum-of-entries [entries] | |
(reduce #(+ %1 (second %2)) 0 entries)) | |
(defn chooser [[bestword sum] [nextword chance]] | |
(if (<= sum 0) | |
[bestword sum] | |
(if (<= (- sum chance) 0) | |
[nextword (- sum chance)] | |
[bestword (- sum chance)]))) | |
(defn get-random-entry-by-count [entries] | |
(let [sum (get-sum-of-entries entries) | |
rnd (inc (rand-int sum))] | |
(first (reduce chooser [(first (first entries)) rnd] entries)))) | |
(defn build-sentence [dict] | |
(let [first-word (get-random-entry-by-count (dict ">"))] | |
(if (or (empty? first-word) (re-matches #".*\s+.*" first-word)) | |
(recur dict) | |
(let [sentence | |
(loop [prev-word first-word | |
sentence [first-word]] | |
(let [next-word (get-random-entry-by-count (dict prev-word))] | |
(if (or (> (count sentence) 25) (= next-word ".")) | |
sentence | |
(recur next-word (conj sentence next-word)))))] | |
(->> sentence | |
(clojure.string/join " ") | |
(#(str % ".")) | |
clojure.string/capitalize))))) | |
(defn build-paragraph [dict n] | |
(clojure.string/join " " (repeatedly n #(build-sentence dict)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment