Skip to content

Instantly share code, notes, and snippets.

@woxtu
Created January 24, 2015 15:26
Show Gist options
  • Save woxtu/2873895837b22f04275b to your computer and use it in GitHub Desktop.
Save woxtu/2873895837b22f04275b to your computer and use it in GitHub Desktop.
(ns markov
(:require [cljs.nodejs :as nodejs]
[clojure.string :refer [join replace]]))
(def kuromoji (nodejs/require "kuromoji"))
(defn wakachi [tokenizer sentence]
(->> (js->clj (.tokenize tokenizer sentence) :keywordize-keys true)
(map :surface_form)))
(defn n-gramize [order tokens]
(let [tokens (vec (concat tokens (repeat (dec order) nil)))]
(->> (range (- (count tokens) (dec order)))
(map #(subvec tokens % (+ % order))))))
(defn markov-chain [grams & {:keys [start]
:or {start (first (rand-nth grams))}}]
(loop [node start, result [start]]
(if (nil? (last result))
result
(let [gram (rand-nth (filter #(= (first %) node) grams))]
(recur (last gram) (concat result (drop 1 gram)))))))
(defn -main [& args]
(if-let [[sentence _] args]
(-> kuromoji
(.builder #js {:dicPath "./node_modules/kuromoji/dist/dict/"})
(.build (fn [error tokenizer]
(let [tokens (wakachi tokenizer sentence)]
(println (->> tokens (n-gramize 3) markov-chain join))))))))
(nodejs/enable-util-print!)
(set! *main-cli-fn* -main)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment