Abhayparashar31 · September 11, 2022 21:11
diff --git a/Topic_Modeling.py b/Topic_Modeling.py
 def topic_modeling(data):
    ### Tokens
    tokens = []
    for text in data:
        text = word_tokenize(text)
        tokens.append(text)
        
    ### Make Biagrams
    tokens = make_biagram(data=data,tokens=tokens)

    ### Corpora Dictionary
    dictionary = corpora.Dictionary(tokens)
    
    ### Creating Document Term Matrix
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in tokens]

    ### Training The LDA Model
    lda_model =  gensim.models.LdaModel(doc_term_matrix,   ## Document Term Matrix
                                       num_topics = 5,     ## Number of Topics
                                       id2word = dictionary,     ## Word and Frequency Dictionary                                
                                       passes = 10,        ## Number of passes throw the corpus during training (similar to epochs in neural networks)
                                       chunksize=10,       ## Number of documents to be used in each training chunk
                                       update_every=1,     ## Number of documents to be iterated through for each update.
                                       alpha='auto',       ## number of expected topics that expresses
                                       per_word_topics=True,
                                       random_state=42)

    ### Exploring Common Words For Each Topic With Their Relative Words
    for idx, topic in lda_model.print_topics():
        print("Topic: {} \nWords: {}".format(idx, topic ))
        print("\n")

 topic_modeling(cleaned_reviews)
	def topic_modeling(data):
	### Tokens
	tokens = []
	for text in data:
	text = word_tokenize(text)
	tokens.append(text)

	### Make Biagrams
	tokens = make_biagram(data=data,tokens=tokens)

	### Corpora Dictionary
	dictionary = corpora.Dictionary(tokens)

	### Creating Document Term Matrix
	doc_term_matrix = [dictionary.doc2bow(doc) for doc in tokens]

	### Training The LDA Model
	lda_model = gensim.models.LdaModel(doc_term_matrix, ## Document Term Matrix
	num_topics = 5, ## Number of Topics
	id2word = dictionary, ## Word and Frequency Dictionary
	passes = 10, ## Number of passes throw the corpus during training (similar to epochs in neural networks)
	chunksize=10, ## Number of documents to be used in each training chunk
	update_every=1, ## Number of documents to be iterated through for each update.
	alpha='auto', ## number of expected topics that expresses
	per_word_topics=True,
	random_state=42)

	### Exploring Common Words For Each Topic With Their Relative Words
	for idx, topic in lda_model.print_topics():
	print("Topic: {} \nWords: {}".format(idx, topic ))
	print("\n")

	topic_modeling(cleaned_reviews)