avriiil · January 30, 2023 15:16
diff --git a/gsdmm-coherence-score.py b/gsdmm-coherence-score.py
 # import library from gensim  
 from gensim.models import CoherenceModel

 # define function to get words in topics
 def get_topics_lists(model, top_clusters, n_words):
    '''
    Gets lists of words in topics as a list of lists.
    
    model: gsdmm instance
    top_clusters:  numpy array containing indices of top_clusters
    n_words: top n number of words to include
    
    '''
    # create empty list to contain topics
    topics = []
    
    # iterate over top n clusters
    for cluster in top_clusters:
        #create sorted dictionary of word distributions
        sorted_dict = sorted(model.cluster_word_distribution[cluster].items(), key=lambda k: k[1], reverse=True)[:n_words]
         
        #create empty list to contain words
        topic = []
        
        #iterate over top n words in topic
        for k,v in sorted_dict:
            #append words to topic list
            topic.append(k)
            
        #append topics to topics list    
        topics.append(topic)
    
    return topics

 # get topics to feed to coherence model
 topics = get_topics_lists(gsdmm, top_index, 20) 

 # evaluate model using Topic Coherence score
 cm_gsdmm = CoherenceModel(topics=topics, 
                          dictionary=dictionary, 
                          corpus=bow_corpus, 
                          texts=docs, 
                          coherence='c_v')

 # get coherence value
 coherence_gsdmm = cm_gsdmm.get_coherence()  

 print(coherence_gsdmm)
	# import library from gensim
	from gensim.models import CoherenceModel

	# define function to get words in topics
	def get_topics_lists(model, top_clusters, n_words):
	'''
	Gets lists of words in topics as a list of lists.

	model: gsdmm instance
	top_clusters: numpy array containing indices of top_clusters
	n_words: top n number of words to include

	'''
	# create empty list to contain topics
	topics = []

	# iterate over top n clusters
	for cluster in top_clusters:
	#create sorted dictionary of word distributions
	sorted_dict = sorted(model.cluster_word_distribution[cluster].items(), key=lambda k: k[1], reverse=True)[:n_words]

	#create empty list to contain words
	topic = []

	#iterate over top n words in topic
	for k,v in sorted_dict:
	#append words to topic list
	topic.append(k)

	#append topics to topics list
	topics.append(topic)

	return topics

	# get topics to feed to coherence model
	topics = get_topics_lists(gsdmm, top_index, 20)

	# evaluate model using Topic Coherence score
	cm_gsdmm = CoherenceModel(topics=topics,
	dictionary=dictionary,
	corpus=bow_corpus,
	texts=docs,
	coherence='c_v')

	# get coherence value
	coherence_gsdmm = cm_gsdmm.get_coherence()

	print(coherence_gsdmm)