telent · March 31, 2016 14:08
diff --git a/markov.clj b/markov.clj
 (ns money-tolkien.core
  (:require [reaver :as r]
            [clojure.string :as str]
            [clojure.xml :as xml]
            [clojure.zip :as zip]))

 (def tolkien-text (slurp "http://scifi.stackexchange.com/feeds/tag?tagnames=tolkien&sort=newest"))

 (def money-text (slurp "https://blog.moneyadviceservice.org.uk/tag/mortgages.atom"))

 (defn zip-str [s]
  (zip/xml-zip
      (xml/parse (java.io.ByteArrayInputStream. (.getBytes s)))))

 (defn entries [doc]
  (filter #(= (:tag %) :entry) (tree-seq identity :content {:content doc})))

 (defn texts [entry]
  (let [els (filter #(contains? #{:title :summary :content} (:tag %))
                    (tree-seq identity :content entry))]
    (mapcat :content els)))


 (defn all-text [atom-doc]
  (str/join " " (map #(-> % r/parse .text ) (mapcat texts (entries atom-doc)))))


 ;; we want a map in which each key is a pair of adjacent words and its
 ;; value is a collection (with duplicates) of words that may follow

 (defn occurrences-map [ & docs]
  (let [text (str/join " " (map all-text docs))]
    (reduce (fn [m [prev1 prev2 next]]
              (let [k [prev1 prev2]]
                (assoc m k (conj (get m k []) next))))
            {}
            (partition 3 1  (str/split text #" ")))))


 (defn random-from-map [map [prev1 prev2]]
  (let [choices (get map [prev1 prev2])]
    [prev2 (get choices (rand-int (count choices)))]))


 (defn chain [omap [word1 word2]]
  (map second
       (iterate
        (partial random-from-map omap)
        [word1 word2])))
	(ns money-tolkien.core
	(:require [reaver :as r]
	[clojure.string :as str]
	[clojure.xml :as xml]
	[clojure.zip :as zip]))

	(def tolkien-text (slurp "http://scifi.stackexchange.com/feeds/tag?tagnames=tolkien&sort=newest"))

	(def money-text (slurp "https://blog.moneyadviceservice.org.uk/tag/mortgages.atom"))

	(defn zip-str [s]
	(zip/xml-zip
	(xml/parse (java.io.ByteArrayInputStream. (.getBytes s)))))

	(defn entries [doc]
	(filter #(= (:tag %) :entry) (tree-seq identity :content {:content doc})))

	(defn texts [entry]
	(let [els (filter #(contains? #{:title :summary :content} (:tag %))
	(tree-seq identity :content entry))]
	(mapcat :content els)))


	(defn all-text [atom-doc]
	(str/join " " (map #(-> % r/parse .text ) (mapcat texts (entries atom-doc)))))


	;; we want a map in which each key is a pair of adjacent words and its
	;; value is a collection (with duplicates) of words that may follow

	(defn occurrences-map [ & docs]
	(let [text (str/join " " (map all-text docs))]
	(reduce (fn [m [prev1 prev2 next]]
	(let [k [prev1 prev2]]
	(assoc m k (conj (get m k []) next))))
	{}
	(partition 3 1 (str/split text #" ")))))


	(defn random-from-map [map [prev1 prev2]]
	(let [choices (get map [prev1 prev2])]
	[prev2 (get choices (rand-int (count choices)))]))


	(defn chain [omap [word1 word2]]
	(map second
	(iterate
	(partial random-from-map omap)
	[word1 word2])))