Last active
January 1, 2016 08:19
-
-
Save maxcountryman/8117739 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns brander.core | |
(:require [clojure.java.io :as io] | |
[clojure.java.shell :refer [sh]] | |
[clojure.string :as string])) | |
(def words | |
(lazy-seq | |
(with-open [r (io/reader "/usr/share/dict/words")] | |
(doall (map (comp string/lower-case string/trim) (line-seq r)))))) | |
(def ascii (for [n (range 97 123)] (char n))) | |
(defn ascii? [s] (every? (set ascii) s)) | |
(def words (filter ascii? words)) | |
(def words (for [w words] (str "^" w "$"))) | |
(defn update-inc [d ks] (update-in d ks (fn [n] (inc (or n 0))))) | |
(defn word->grams | |
[grams-map word n] | |
(let [grams (partition n 1 word)] | |
(loop [gram (first grams) more (next grams) grams-map grams-map] | |
(if more | |
(recur (first more) | |
(next more) | |
(update-inc grams-map [gram (first more)])) | |
grams-map)))) | |
(defn words->grams | |
[words n] | |
(reduce (fn [grams-map word] | |
(if (>= (count word) n) | |
(let [grams-map (update-inc grams-map ["" (take n word)])] | |
(word->grams grams-map word n)) | |
(word->grams grams-map word n))) | |
{} words)) | |
(defn map-vals [f m] (into {} (for [[k v] m] [k (f v)]))) | |
(defn update-vals | |
[m] | |
(let [sum (reduce + (vals m))] | |
(map-vals #(/ % sum) m))) | |
(defn normalized [grams] (map-vals update-vals grams)) | |
(defn sample | |
[gram-map] | |
(loop [more gram-map cdf 0 sam (rand)] | |
(let [[gram weight] (first more) | |
cdf (-> weight (or 0) (+ cdf))] | |
(cond | |
(not more) (-> gram-map keys rand-nth) | |
(>= cdf sam) gram | |
:else (recur (next more) cdf sam))))) | |
(defn multi-word | |
[word grams] | |
(if (and (-> (last word) (= \$)) (> (rand) 7/10) (< (count word) 8)) | |
(sample (get grams "")) | |
word)) | |
(defn get-grams [n] (-> words (words->grams n) normalized)) | |
(def get-grams-memo (memoize get-grams)) | |
(defn gen-word | |
[n] | |
(let [grams (get-grams-memo n)] | |
(loop [word (sample (get grams ""))] | |
(if (-> (last word) (not= \$)) | |
(let [gram (take-last n word)] | |
(if-let [gram (get grams gram)] | |
(recur (concat word (->> gram sample (take-last 1)))) | |
(recur word))) | |
(string/replace (apply str (multi-word word grams)) #"\^|\$" ""))))) | |
(defn available? | |
[domain] | |
(boolean (re-find #"No match for" (:out (sh "whois" domain))))) | |
(defn find-domains | |
[& [n]] | |
(while true | |
(let [word (repeat 3 (gen-word (or n 4))) | |
domain (-> (sort-by count word) first (str ".com"))] | |
(if (available? domain) | |
(prn (str domain " <-- Available")) | |
(prn domain))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment