Created
April 3, 2014 03:44
-
-
Save austintaylor/9947958 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Given a list of permutations, only keep the highest scoring option | |
; for each unique key as returned by the key function and the score | |
; function. | |
(defn prune [s key-f score-f] | |
(map (fn [[k v]] (apply max-key score-f v)) (group-by key-f s))) | |
; Iterate over the provided starting value until the condition is met. | |
(defn iterate-until [f until-f start] | |
(first (drop-while (complement until-f) (iterate f start)))) | |
(defn viterbi [states obs start-prob trans-prob emit-prob] | |
(apply max-key :prob | |
(reduce (fn [paths ob] | |
(prune | |
(for [prev paths | |
st1 states | |
:let [st0 (:state prev) | |
prob (* (:prob prev) ((trans-prob st0) st1) ((emit-prob st1) ob)) | |
path (conj (:path prev) st1)]] | |
{:state st1 :path path :prob prob}) | |
:state :prob)) | |
(map (fn [st] {:state st :path [st] :prob (* (start-prob st) ((emit-prob st) (obs 0)))}) states) | |
(rest obs)))) | |
(let [states [:healthy :fever] | |
observations [:normal :cold :dizzy] | |
starting-probabilities {:healthy 0.6 :fever 0.4} | |
transition-probabilities {:healthy {:healthy 0.7 :fever 0.3} | |
:fever {:healthy 0.4 :fever 0.6}} | |
emission-probabilities {:healthy {:normal 0.5 :cold 0.4 :dizzy 0.1} | |
:fever {:normal 0.1 :cold 0.3 :dizzy 0.6}} | |
expected-result {:state :fever, :path [:healthy :healthy :fever], :prob 0.01512}] | |
(= expected-result (viterbi states observations starting-probabilities transition-probabilities emission-probabilities))) | |
(defn levenshtein [a b] | |
(let [[[_ _ score]] | |
(iterate-until | |
#(prune | |
(mapcat | |
(fn [[a b score]] | |
(if (= (first a) (first b)) | |
[[(rest a) (rest b) score]] | |
(remove nil? [(when (seq a) [(rest a) b (inc score)]) | |
(when (seq b) [a (rest b) (inc score)]) | |
(when (and (seq a) (seq b)) [(rest a) (rest b) (inc score)])]))) | |
%) | |
butlast (comp - last)) | |
(comp (partial every? empty?) butlast first) | |
[[a b 0]])] | |
score)) | |
(= 10 (levenshtein "ttttattttctg" "tcaaccctaccat")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment