Created
January 2, 2015 13:44
-
-
Save qwtel/cd0aace687d3becf4717 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns markov-usrnames) | |
(require '[clojure.string :as str]) | |
(def file (slurp "/usr/share/dict/words" :encoding "ASCII")) | |
(def words (str/split-lines file)) | |
(defn generate-markov-nodes | |
[words] | |
(->> | |
words | |
(map str/lower-case) | |
(str/join \space) | |
(partition 2 1) | |
(reduce | |
(fn [acc [l next-l]] (update-in acc [l next-l] (fnil inc 0))) | |
{} | |
) | |
) | |
) | |
(defn wrand | |
"given a vector of slice sizes, returns the index of a slice given a | |
random spin of a roulette wheel with compartments proportional to | |
slices." | |
[slices] | |
(let [total (reduce + slices) | |
r (rand total)] | |
(loop [i 0 sum 0] | |
(if (< r (+ (slices i) sum)) | |
i | |
(recur (inc i) (+ (slices i) sum)) | |
) | |
) | |
) | |
) | |
(defn generate-usrname [nodes] | |
(loop [node (nodes \space) | |
acc []] | |
(let [probabilities (vec (vals node)) | |
index (wrand probabilities) | |
letter (nth (keys node) index) | |
next-node (nodes letter)] | |
(if (= 5 (count acc)) | |
(clojure.string/join acc) | |
(if (= letter \space) | |
(recur node acc) | |
(recur next-node (conj acc letter)) | |
) | |
) | |
) | |
) | |
) | |
(def nodes (generate-markov-nodes words)) | |
(->> | |
(repeatedly (partial generate-usrname nodes)) | |
distinct | |
(take 10000) | |
(str/join "\n") | |
(spit "markov-usrnames.txt") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment