Skip to content

Instantly share code, notes, and snippets.

@ckirkendall
Created July 8, 2013 20:38
Show Gist options
  • Save ckirkendall/5952290 to your computer and use it in GitHub Desktop.
Save ckirkendall/5952290 to your computer and use it in GitHub Desktop.
Clojure version of Norvig's spell checker for LambdaJam.
(ns spell-checker.speller)
(defonce training-data (re-seq #"[a-z]+" (.toLowerCase (slurp "/home/ckirkendall/Development/clojure/spelling-jam/data/big.txt"))))
(defonce NWORDS (atom (frequencies training-data)))
(defn deletes [word]
(map #(let [[pre [_ & post]] (split-at % word)]
(apply str (concat pre post)))
(range (count word))))
(defn transposes [word]
(map #(let [[pre [a b & post]] (split-at % word)]
(apply str (concat pre [b a] post)))
(range (dec (count word)))))
(defn replaces [word]
(let [wrd (vec word)]
(for [ch "abcdefghijklmnopqrstuvwxyz" i (range (count wrd))]
(apply str (assoc wrd i ch)))))
(defn inserts [word]
(for [ch "abcdefghijklmnopqrstuvwxyz" i (range (count word))]
(let [[pre post] (split-at i word)]
(apply str (concat pre (cons ch post))))))
(defn edits1 [word]
(set (concat (deletes word)
(transposes word)
(replaces word)
(inserts word))))
(defn edits2 [word]
(set (mapcat #(edits1 %) (edits1 word))))
(defn known [words] (filter @NWORDS words))
(defn correct [word]
(let [w (known [word])
w (if (empty? w) (known (edits1 word)) w)
w (if (empty? w) (known (edits2 word)) w)
w (if (empty? w) [word] w)]
(sort-by @NWORDS w)))
(def test1 (rest (map #(re-seq #"[a-z]+" %) (line-seq (clojure.java.io/reader "/home/ckirkendall/Development/clojure/spelling-jam/data/test1.txt")))))
(def test2 (rest (map #(re-seq #"[a-z]+" %) (line-seq (clojure.java.io/reader "/home/ckirkendall/Development/clojure/spelling-jam/data/test2.txt")))))
(defn testing-speller [test-data]
(reduce #(let [[c i] %1
[expected & tail] %2
cor (count
(filter (fn [word]
(= expected (first (correct word)))) tail))]
[(+ c (count tail)) (+ i (- (count tail) cor))])
[0 0]
test-data))
(defn add-word [word]
(swap! NWORDS assoc word 1))
(testing-speller test2)
(correct "exstacy")
(correct "hizzy")
(add-word "hizzy")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment