Skip to content

Instantly share code, notes, and snippets.

@y-mitsui
Created April 28, 2016 09:12
Show Gist options
  • Select an option

  • Save y-mitsui/9fd73f0362f2f891c9ddcc54a8302af3 to your computer and use it in GitHub Desktop.

Select an option

Save y-mitsui/9fd73f0362f2f891c9ddcc54a8302af3 to your computer and use it in GitHub Desktop.
import numpy.random.mtrand
import numpy as np
class LdaCvb0:
def __init__(self, word_indexes, word_counts, n_topics, alpha=0.1, beta=0.01):
self.word_counts = word_counts
self.word_indexes = word_indexes
self.n_words = np.max(np.max(word_indexes)) + 1
self.n_topics = n_topics
self.gamma_dik = []
self.mean_ndk = np.zeros([word_indexes.shape[0], n_topics])
self.var_ndk = np.zeros([word_indexes.shape[0], n_topics])
self.alpha = alpha
self.beta = beta
self.mean_nkv = np.zeros([n_topics, self.n_words])
self.var_nkv = np.zeros([n_topics, self.n_words])
for d, (doc_w, doc_c) in enumerate(zip(word_indexes,word_counts)):
gamma_ik = []
for i, (word, count) in enumerate(zip(doc_w,doc_c)):
gamma_k = numpy.random.mtrand.dirichlet([alpha] * n_topics)
gamma_ik.append(gamma_k)
self.gamma_dik.append(gamma_ik)
self.count_update(i, d, word, count)
print self.gamma_dik
def count_update(self, d, i, v, scale):
for k in range(self.n_topics):
mc = scale * self.gamma_dik[i][d][k]
vc = scale * self.gamma_dik[i][d][k] * (1. - self.gamma_dik[i][d][k])
self.mean_ndk[d][k] += mc
self.var_ndk[d][k] += vc
self.mean_nkv[k][v] += mc
self.var_nkv[k][v] += vc
def infer(self, word_numbers, word_counts, n_topics):
for d, (doc_w, doc_c) in enumerate(zip(word_indexes, word_counts)):
for i, (word, count) in enumerate(zip(doc_w, doc_c)):
self.count_update(i, d, word, -count)
for k in range(self.n_topics):
new_gamma = (self.mean_nkv[k][word] + self.beta) / (self.mean_nkv[k].sum() + self.beta * self.n_words)
new_gamma *= self.mean_ndk[d][k] + self.alpha
self.gamma[d][i][k]
n_doc = len(word_indexes)
for doc_no in range(n_doc):
word_no = word_numbers[doc_no]
word_conter = word_counts[doc_no]
"""
for word, word_count in zip(word_no,word_conter):
gamma[]
"""
docs_w = np.array([[1,2],
[1,2],
[3,4],
[3,4],
[0],
[0]])
docs_c = np.array([[2,1],
[4,1],
[3,1],
[4,2],
[5],
[4]])
ctx = LdaCvb0(docs_w, docs_c, 3)
print ctx
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment