Skip to content

Instantly share code, notes, and snippets.

;; Gibbs Sampling State - All distributions are DirichletMultinomial
;; type-assigns: map word string to tag state (integer)
;; tag-prior: prior distr over tag assignment
;; trans-distrs: map of tag => P(tag' | tag) distribution
;; emission-distrs: tag => P(word | tag) distribution, word=string representation
;; feat-distrs: tag => feat-type => P(feat-val | feat-type,tag) distribution
(defrecord State [type-assigns tag-prior trans-distrs emission-distrs feat-distrs])
(defn obs-emissions
"if a word has been assigned to a tag, we increment num-keys by 1
and add weight * num-occurences of the word to counts"
[tag-emission-distr word-info weight]
(-> tag-emission-distr
(update-in [:num-keys] (if (> weight 0) inc dec))
(observe (:word word-info) (* weight (:count word-info)))))
(defn obs-transitions
"if we set word to tag, we update the transition tag counts
from all context usages of word by weight amount"
[trans-distrs type-assigns word-info tag weight]
(reduce
(fn [res [[before after] count]]
(let [type-assigns (assoc type-assigns (:word word-info) tag)
before-tag (type-assigns before)
after-tag (type-assigns after)]
(-> res
(defn gibbs-sample [state word-info]
(let [state (unassign state word-info)
scores (map (partial score-assign state word-info) (range *K*))
sample-tag (sample-from-scores scores)]
(assign state word-info sample-tag)))
(defn score-assign
"Log probability of assigning word to tag"
[state word-info tag]
(+ ;; Tag Prior
(log-prob (:tag-prior state) tag)
;; Feature Prob
(sum
(fn t1 [[k v]]
(log-prob (get-in state [:feat-distrs tag k]) v))
(:feats word-info))
(defn log-add
"log (sum xs) from seq of log-x"
[log-xs]
(let [max-log-x (apply max log-xs)]
(+ max-log-x
(Math/log (sum
(for [log-x log-xs
:let [diff (- log-x max-log-x)]
:when (> diff -30)]
(defn log-add
"log (sum xs) from seq of log-x"
[log-xs]
(let [max-log-x (apply max log-xs)]
(+ max-log-x
(Math/log (sum
(for [log-x log-xs
:let [diff (- log-x max-log-x)]
:when (> diff -30)]
@aria42
aria42 / fsm.clj
Created November 29, 2010 02:15
Proposal FSM for Work
(ns work.fsm
{:doc "Finite State Machine Abstraction for Asynchronous Processing."}
(:use [plumbing.core :only [map-map map-from-vals]]
[work.core :only [queue-work shutdown-now]]
[work.queue :only [ poll offer local-queue]]))
(defrecord ^:private State [id work-fn routing-fn num-threads])
(defn- offer-to-state [state elem]
(offer (:inbox state) elem))
(ns work.aggregators
(:use [plumbing.core]
[work.core :only [work available-processors map-work queue-work]]
[work.queue :only [local-queue]]))
(defn- channel-as-lazy-seq
"ch is a fn you call to get an item (no built-in timeout, use with-timeout).
When the ch is exhausted it returns :eof.
This fn returns a lazy sequence from the channel."
(ns work.graph
(:require
[clojure.contrib.logging :as log]
[clojure.zip :as zip]
[work.core :as work]
[work.queue :as workq]))
(defn table
"takes kv pairs.
keys can be vectors of keys, matching a fn.