Skip to content

Instantly share code, notes, and snippets.

@cigrainger
Created June 22, 2014 13:12
Show Gist options
  • Select an option

  • Save cigrainger/a0341419de91dc84e301 to your computer and use it in GitHub Desktop.

Select an option

Save cigrainger/a0341419de91dc84e301 to your computer and use it in GitHub Desktop.
# Run models to find natural number of topics
kl = []
l = np.array([sum(cnt for _, cnt in doc) for doc in my_corpus])
num = range(1,150,1)
for i in num:
lda = models.ldamodel.LdaModel(corpus=my_corpus,
id2word=dictionary,num_topics=i)
#Topic-word matrix
m1 = lda.expElogbeta
U,cm1,V = np.linalg.svd(m1)
#Document-topic matrix
lda_topics = lda[my_corpus]
m2 = matutils.corpus2dense(lda_topics, lda.num_topics).transpose()
cm2 = l.dot(m2)
# cm2 = cm2 + 0.0001
cm2norm = np.linalg.norm(l)
cm2 = cm2/cm2norm
div = sym_kl(cm1,cm2)
kl.append(div)
# Plot kl divergence against number of topics
plt.plot(kl)
plt.ylabel('Symmetric KL Divergence')
plt.xlabel('Number of Topics')
plt.savefig('kldiv.png', bbox_inches='tight')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment