Skip to content

Instantly share code, notes, and snippets.

@qwtel
Created January 2, 2015 13:44
Show Gist options
  • Save qwtel/cd0aace687d3becf4717 to your computer and use it in GitHub Desktop.
Save qwtel/cd0aace687d3becf4717 to your computer and use it in GitHub Desktop.
(ns markov-usrnames)
(require '[clojure.string :as str])
(def file (slurp "/usr/share/dict/words" :encoding "ASCII"))
(def words (str/split-lines file))
(defn generate-markov-nodes
[words]
(->>
words
(map str/lower-case)
(str/join \space)
(partition 2 1)
(reduce
(fn [acc [l next-l]] (update-in acc [l next-l] (fnil inc 0)))
{}
)
)
)
(defn wrand
"given a vector of slice sizes, returns the index of a slice given a
random spin of a roulette wheel with compartments proportional to
slices."
[slices]
(let [total (reduce + slices)
r (rand total)]
(loop [i 0 sum 0]
(if (< r (+ (slices i) sum))
i
(recur (inc i) (+ (slices i) sum))
)
)
)
)
(defn generate-usrname [nodes]
(loop [node (nodes \space)
acc []]
(let [probabilities (vec (vals node))
index (wrand probabilities)
letter (nth (keys node) index)
next-node (nodes letter)]
(if (= 5 (count acc))
(clojure.string/join acc)
(if (= letter \space)
(recur node acc)
(recur next-node (conj acc letter))
)
)
)
)
)
(def nodes (generate-markov-nodes words))
(->>
(repeatedly (partial generate-usrname nodes))
distinct
(take 10000)
(str/join "\n")
(spit "markov-usrnames.txt")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment