Skip to content

Instantly share code, notes, and snippets.

@dbieber
Last active January 2, 2016 18:39
Show Gist options
  • Select an option

  • Save dbieber/8344929 to your computer and use it in GitHub Desktop.

Select an option

Save dbieber/8344929 to your computer and use it in GitHub Desktop.
Creates a markov model and implements viterbi. Given a sequence of local belief states (observations), finds the most likely sequence of states.
def get_probabilities(corpus, alpha="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
corpus = ''.join([c for c in corpus if c in alpha])
counts = [corpus.count(c) for c in alpha]
start_probs = np.array(counts, dtype=float) / sum(counts)
trans_counts = [[0.1 + corpus.count("%s%s" % (a,b)) for b in alpha] for a in alpha]
trans_probs = np.array([np.array(x, dtype=float)/sum(x) for x in trans_counts])
return (start_probs, trans_probs)
start_probs, trans_probs = get_probabilities("THISSHOULDBEAHUGESTRINGLIKETHISBUTHUNDREDSOFTIMESBIGGER")
class Viterbi():
def __init__(self, start_probs, trans_probs):
self.trans_probs = np.array(trans_probs)
# viterbi [t,i] is [probability of being at state i at time t | observations up to t, most likely previous state]
# -1 indicates no previous state
# t=1 indicates the first observation; t=0 indicates start probabilities
self.viterbi = [[[x,-1] for x in start_probs]]
def observe(self, obs_probs):
# obs_props[i] == P(i|obs)
next_probs = [[0,-1]] * len(self.viterbi[-1])
for i, (prob_at, _) in enumerate(self.viterbi[-1]):
for j, prob_to in enumerate(self.trans_probs[i,:]):
next_prob = prob_at * prob_to * obs_probs[j]
if next_prob > next_probs[j][0]:
next_probs[j] = [next_prob, i]
# normalize
total = sum(x[0] for x in next_probs)
next_probs = [[i/total, j] for (i,j) in next_probs]
self.viterbi.append(next_probs)
def best_path(self):
path = []
# Find the most recent node of the best path
count = -1 + len(self.viterbi)
i = np.argmax([p for (p,prev) in self.viterbi[count]]) # index of class
(prob, prev) = self.viterbi[count][i]
while prev != -1:
count -= 1
path = [clf.classifier.classes_[i]] + path
i = prev
(prob, prev) = self.viterbi[count][i]
return path
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment