Created
March 8, 2017 20:07
-
-
Save pieter-van-prooijen/8c3f4e9a5ebf9f6424218276b9ea667d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns alice-clj.core) | |
(def text (slurp "resources/19033.txt")) | |
(def words (clojure.string/split text #"[^A-Za-z]+")) | |
(def pairs (partition 2 1 (map clojure.string/lower-case words))) | |
(defn add-pair [m [w1 w2]] | |
(update-in m [w1 w2] (fn [count] | |
(if count (inc count) 1)))) | |
(def freqs (reduce add-pair {} pairs)) | |
(def sorted-freqs (into {} (map (fn [[w1 m]] | |
[w1 (map first (sort-by second > m))]) freqs))) | |
(defn next-word [sorted-freqs w] | |
(let [words (get sorted-freqs w) | |
word (rand-nth (take 3 words))] | |
word)) | |
(defn upcase-word [s] | |
(clojure.string/join (conj (rest s) (Character/toUpperCase (first s))))) | |
(defn sentence [n] | |
(let [start (rand-nth words) | |
[first-word & next-words] (take n (iterate (partial next-word sorted-freqs) start))] | |
(clojure.string/join " " (conj next-words (upcase-word first-word))))) | |
(sentence 6) | |
Will be a little thing i | |
alice-clj.core> (sentence 8) | |
Said to be off with a low hall | |
alice-clj.core> (sentence 23) | |
Go and the project gutenberg tm work is to alice s the king said alice s the king said to alice in the | |
alice-clj.core> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment