austintaylor · April 3, 2014 03:44
diff --git a/dynamic-programming.clj b/dynamic-programming.clj
 ; Given a list of permutations, only keep the highest scoring option
 ; for each unique key as returned by the key function and the score
 ; function.
 (defn prune [s key-f score-f]
  (map (fn [[k v]] (apply max-key score-f v)) (group-by key-f s)))

 ; Iterate over the provided starting value until the condition is met.
 (defn iterate-until [f until-f start]
  (first (drop-while (complement until-f) (iterate f start))))

 (defn viterbi [states obs start-prob trans-prob emit-prob]
  (apply max-key :prob
         (reduce (fn [paths ob]
                   (prune
                    (for [prev paths
                          st1 states
                          :let [st0 (:state prev)
                                prob (* (:prob prev) ((trans-prob st0) st1) ((emit-prob st1) ob))
                                path (conj (:path prev) st1)]]
                      {:state st1 :path path :prob prob})
                    :state :prob))
                 (map (fn [st] {:state st :path [st] :prob (* (start-prob st) ((emit-prob st) (obs 0)))}) states)
                 (rest obs))))

 (let [states [:healthy :fever]
      observations [:normal :cold :dizzy]
      starting-probabilities {:healthy 0.6 :fever 0.4}
      transition-probabilities {:healthy {:healthy 0.7 :fever 0.3}
                                :fever   {:healthy 0.4 :fever 0.6}}
      emission-probabilities {:healthy {:normal 0.5 :cold 0.4 :dizzy 0.1}
                              :fever   {:normal 0.1 :cold 0.3 :dizzy 0.6}}
      expected-result {:state :fever, :path [:healthy :healthy :fever], :prob 0.01512}]
  (= expected-result (viterbi states observations starting-probabilities transition-probabilities emission-probabilities)))

 (defn levenshtein [a b]
  (let [[[_ _ score]] 
         (iterate-until
          #(prune
            (mapcat
             (fn [[a b score]]
               (if (= (first a) (first b))
                 [[(rest a) (rest b) score]]
                 (remove nil? [(when (seq a) [(rest a) b (inc score)])
                               (when (seq b) [a (rest b) (inc score)])
                               (when (and (seq a) (seq b)) [(rest a) (rest b) (inc score)])])))
             %)
            butlast (comp - last))
          (comp (partial every? empty?) butlast first)
          [[a b 0]])]
    score))
     
 (= 10 (levenshtein "ttttattttctg" "tcaaccctaccat"))
	; Given a list of permutations, only keep the highest scoring option
	; for each unique key as returned by the key function and the score
	; function.
	(defn prune [s key-f score-f]
	(map (fn [[k v]] (apply max-key score-f v)) (group-by key-f s)))

	; Iterate over the provided starting value until the condition is met.
	(defn iterate-until [f until-f start]
	(first (drop-while (complement until-f) (iterate f start))))

	(defn viterbi [states obs start-prob trans-prob emit-prob]
	(apply max-key :prob
	(reduce (fn [paths ob]
	(prune
	(for [prev paths
	st1 states
	:let [st0 (:state prev)
	prob (* (:prob prev) ((trans-prob st0) st1) ((emit-prob st1) ob))
	path (conj (:path prev) st1)]]
	{:state st1 :path path :prob prob})
	:state :prob))
	(map (fn [st] {:state st :path [st] :prob (* (start-prob st) ((emit-prob st) (obs 0)))}) states)
	(rest obs))))

	(let [states [:healthy :fever]
	observations [:normal :cold :dizzy]
	starting-probabilities {:healthy 0.6 :fever 0.4}
	transition-probabilities {:healthy {:healthy 0.7 :fever 0.3}
	:fever {:healthy 0.4 :fever 0.6}}
	emission-probabilities {:healthy {:normal 0.5 :cold 0.4 :dizzy 0.1}
	:fever {:normal 0.1 :cold 0.3 :dizzy 0.6}}
	expected-result {:state :fever, :path [:healthy :healthy :fever], :prob 0.01512}]
	(= expected-result (viterbi states observations starting-probabilities transition-probabilities emission-probabilities)))

	(defn levenshtein [a b]
	(let [[[_ _ score]]
	(iterate-until
	#(prune
	(mapcat
	(fn [[a b score]]
	(if (= (first a) (first b))
	[[(rest a) (rest b) score]]
	(remove nil? [(when (seq a) [(rest a) b (inc score)])
	(when (seq b) [a (rest b) (inc score)])
	(when (and (seq a) (seq b)) [(rest a) (rest b) (inc score)])])))
	%)
	butlast (comp - last))
	(comp (partial every? empty?) butlast first)
	[[a b 0]])]
	score))

	(= 10 (levenshtein "ttttattttctg" "tcaaccctaccat"))