Skip to content

Instantly share code, notes, and snippets.

config: {}
vars: {:pwd "/Users/aria42/Dropbox/projs/mr_indep" :args ["start" "-d"] :opts {:d [""]} :env {"GOBIN" "/Users/aria42/scratch/bin" "GOOS" "darwin" "USER" "aria42" "OLDPWD" "/Users/aria42/Dropbox/projs/mochi-clj" "Apple_PubSub_Socket_Render" "/tmp/launch-mF7iDc/Render" "TMPDIR" "/var/folders/fr/frtCgPIfGZ4e2GpLSIfsNE+++TI/-Tmp-/" "PS1" "[\\W]$ " "LSCOLORS" "gxfxcxdxbxegedabagacad" "TERM_PROGRAM_VERSION" "273" "TERM" "xterm-color" "SECURITYSESSIONID" "325ce0" "SHLVL" "1" "__CF_USER_TEXT_ENCODING" "0x1F5:0:0" "COMMAND_MODE" "unix2003" "SHELL" "/bin/bash" "DISPLAY" "/tmp/launch-7J2YLr/org.x:0" "LOGNAME" "aria42" "LANG" "en_US.UTF-8" "PWD" "/Users/aria42/Dropbox/projs/mr_indep" "PATH" "/Users/aria42/scratch/bin:~/Dropbox/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin:/usr/local/git/bin:/usr/texbin:/usr/X11/bin" "CLICOLOR" "true" "GOARCH" "386" "SSH_AUTH_SOCK" "/tmp/launch-J0Zf8O/Listeners" "_" "/usr/bin/cake" "HOME" "/Users/aria42" "GOROOT" "/Users/aria42/scratch/go" "TERM_PROGRAM" "Apple_Terminal"}}
/usr/bin/cake:439:in `gets': Interrupt
from /usr/bin/cake:439:in `ping'
from /usr/bin/cake:521:in `with_socket'
from /usr/bin/cake:437:in `ping'
from /usr/bin/cake:790
clojure.lang.Compiler$CompilerException: java.lang.IncompatibleClassChangeError: Class clojure.lang.Var$1 does not implement the requested interface clojure.lang.IFn (main.clj:0)
at clojure.lang.Compiler.eval (Compiler.java:5440)
clojure.lang.Compiler.load (Compiler.java:5857)
clojure.lang.Compiler.loadFile (Compiler.java:5820)
clojure.lang.RT$3.invoke (RT.java:296)
cake.server$reload_files$fn__2006.invoke (server.clj:48)
cake.server$reload_files.invoke (server.clj:48)
cake.server$create$fn__2056.invoke (server.clj:134)
cake.contrib.server_socket$accept_fn$fn__1756$fn__1759.invoke (server_socket.clj:39)
cake.contrib.server_socket$accept_fn$fn__1756.invoke (server_socket.clj:38)
(ns type-level-tagger
{:doc "Implements State-of-the-art Unsupervised Part-of-speech Tagger
from \"Simple Type-Level Unsuperivsed POS Tagging\"
by Yoong-Keok Lee, Aria Haghighi and Regina Barzilay
(http://www.cs.berkeley.edu/~aria42/pubs/typetagging.pdf)
blog post: http://wp.me/pcW6S-x"
:author "Aria Haghighi ([email protected])"}
(:use [clojure.java.io :only [reader]]
[clojure.contrib.duck-streams :only [with-out-writer]]
[clojure.contrib.seq-utils :only [indexed]]
(defrecord Counter [counts total])
(defn get-count
"retrieve count of k from counter, should not be negative"
[counter k]
{:post [(not (neg? %))]}
(get (:counts counter) k 0.0))
(defn inc-count
"increment-count of k in counter by weight amount"
;; Counter: Map from object to value, cache total
(defrecord Counter [counts total])
(defn get-count
"retrieve count of k from counter, should not be negative"
[counter k]
{:post [(not (neg? %))]}
(get (:counts counter) k 0.0))
(defn inc-count
;; Probability Distribution
;; counter: counts of objects
;; lambda: smoothing constants
;; num-keys: number of possible keys, needed to normalize
(defrecord DirichletMultinomial [counter lambda num-keys])
(defn new-dirichlet [lambda num-keys]
(DirichletMultinomial. (Counter. {} 0) lambda num-keys))
(defn log-prob
; Word Information
; word: string of word
; count: # of usages
; feats: map of feature-type to feature-value
; contexts: counter of [before-word after-word] usages (for HMM)
(defrecord WordInfo [word count feats contexts])
(defn get-feats
"Features on a word type"
[w]
(defn tally-usage [word-info before after]
(-> word-info
(update-in [:count] inc)
(update-in [:contexts] inc-count [before after] 1)))
(defn assoc-if-absent [m k f]
(if (m k) m (assoc m k (f k))))
(defn tally-sent [vocab sent]
(reduce
(fn [res [before word after]]
(-> res
(assoc-if-absent word new-word-info)
(update-in [word] tally-usage before after)))
vocab