Created
June 25, 2016 07:53
-
-
Save ilovejs/1b864fdcfd7ffacfc83307273d3c6185 to your computer and use it in GitHub Desktop.
[1]: https://web.stanford.edu/~jurafsky/slp3/ [2] https://www.youtube.com/watch?v=t3JIk3Jgifs [3]http://iacs-courses.seas.harvard.edu/courses/am207/blog/lecture-18.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# a slightly more realistic version of the 'Healthy' vs. 'Fever' model is outlined and then generated | |
import random | |
states = ('Healthy', 'Fever') | |
observations = ('normal', 'cold', 'dizzy') | |
start_probability = {'Healthy': 0.6, 'Fever': 0.4} | |
transition_probability = { | |
'Healthy' : {'Healthy': 0.8, 'Fever': 0.2}, | |
'Fever' : {'Healthy': 0.4, 'Fever': 0.6} | |
} | |
emission_probability = { | |
'Healthy' : {'normal': 0.7, 'cold': 0.2, 'dizzy': 0.1}, | |
'Fever' : {'normal': 0.1, 'cold': 0.4, 'dizzy': 0.5} | |
} | |
# A random HMM is generated using the probability matricies defined above | |
# Both the latent and visible states are generated in order to later assess the accuracy of our algorithms | |
N = 100 | |
hidden = [] | |
visible = [] | |
# generate observations | |
if random.random() < start_probability[states[0]]: | |
hidden.append(states[0]) | |
else: | |
hidden.append(states[1]) | |
for i in xrange(N): | |
current_state = hidden[i] | |
if random.random() < transition_probability[current_state][states[0]]: | |
hidden.append(states[0]) | |
else: | |
hidden.append(states[1]) | |
r = random.random() | |
prev = 0 | |
for observation in observations: | |
prev += emission_probability[current_state][observation] | |
if r < prev: | |
visible.append(observation) | |
break | |
# fixes issue of creating extra hidden state | |
hidden.pop() | |
def print_dptable(V): | |
print '----> what is V[0]: ', V[0] | |
s = " " + " ".join(("%7d" % i) for i in range(len(V))) + "\n" | |
for y in V[0]: | |
s += "%.5s: " % y | |
s += " ".join("%.7s" % ("%f" % v[y]) for v in V) | |
s += "\n" | |
print(s) | |
def viterbi(obs, states, start_p, trans_p, emit_p): | |
V = [{}] | |
backpoint = {} | |
# Initialize base cases (t == 0) | |
for cur_state in states: | |
V[0][cur_state] = start_p[cur_state] * emit_p[cur_state][obs[0]] | |
backpoint[cur_state] = [cur_state] | |
# Run Viterbi for t > 0 | |
for t in range(1, len(obs)): | |
V.append({}) | |
newpath = {} | |
for cur_state in states: | |
# print [(V[t-1][y0] * trans_p[y0][y] * emit_p[y][obs[t]] , y0 ) for y0 in states] | |
# choose most probable tuple, e.g. : (0.11, 'Healthy') or (0.89, 'Fever') | |
(prob, state) = max( | |
(V[t-1][pre_state] * trans_p[pre_state][cur_state] * emit_p[cur_state][obs[t]], pre_state) | |
for pre_state in states | |
) | |
# print 'max pair:', (prob, state), '\n' | |
V[t][cur_state] = prob | |
newpath[cur_state] = backpoint[state] + [cur_state] | |
# Don't need to remember the old paths | |
backpoint = newpath | |
print_dptable(V) | |
# print '-----------> t:', t, ' y: ', y | |
# when t == 99 | |
(prob, state) = max( (V[t][y], y) for y in states ) | |
return (prob, backpoint[state]) | |
# input the generated markov model | |
def example(): | |
return viterbi(visible, | |
states, | |
start_probability, | |
transition_probability, | |
emission_probability) | |
(prob, p_hidden) = example() | |
# assess accuracy of the HMM model | |
wrong = 0 | |
for i in range(len(hidden)): | |
if hidden[i] != p_hidden[i]: | |
wrong = wrong + 1 | |
print "accuracy: " + str(1 - float(wrong) / N) | |
print "Observations going in:" | |
print visible | |
print "States decoded by Viterbi:" | |
print p_hidden |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment