Last active
June 16, 2021 12:54
-
-
Save avriiil/93216740e9e758800dba295b6de249a5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# print number of documents per topic | |
doc_count = np.array(gsdmm.cluster_doc_count) | |
print('Number of documents per topic :', doc_count) | |
# Topics sorted by the number of document they are allocated to | |
top_index = doc_count.argsort()[-15:][::-1] | |
print('Most important clusters (by number of docs inside):', top_index) | |
# define function to get top words per topic | |
def top_words(cluster_word_distribution, top_cluster, values): | |
for cluster in top_cluster: | |
sort_dicts = sorted(cluster_word_distribution[cluster].items(), key=lambda k: k[1], reverse=True)[:values] | |
print("\nCluster %s : %s"%(cluster, sort_dicts)) | |
# get top words in topics | |
top_words(gsdmm.cluster_word_distribution, top_index, 20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment