ululh · May 22, 2021 01:27
diff --git a/LDAfit.py b/LDAfit.py
 # derived from http://scikit-learn.org/stable/auto_examples/applications/topics_extraction_with_nmf_lda.html
 # explanations are located there : https://www.linkedin.com/pulse/dissociating-training-predicting-latent-dirichlet-lucien-tardres

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 import pickle

 n_features = 50
 n_topics = 2

 # Training dataset
 data_samples = ["I like to eat broccoli and bananas.",
                "I ate a banana and spinach smoothie for breakfast.",
                "Chinchillas and kittens are cute.",
                "My sister adopted a kitten yesterday.",
                "Look at this cute hamster munching on a piece of broccoli."
               ]
 # extract fetures and vectorize dataset
 tf_vectorizer = CountVectorizer(max_df=0.95, min_df=1,
                                max_features=n_features,
                                stop_words='english')
 tf = tf_vectorizer.fit_transform(data_samples)

 #save features
 dic = tf_vectorizer.get_feature_names()

 lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
                                learning_method='online',
                                learning_offset=50.,
                                random_state=0)

 # train LDA
 p1 = lda.fit(tf)

 # Save all data necessary for later prediction
 model = (dic,lda.components_,lda.exp_dirichlet_component_,lda.doc_topic_prior_)

 with open('outfile', 'wb') as fp:
    pickle.dump(model, fp)
	# derived from http://scikit-learn.org/stable/auto_examples/applications/topics_extraction_with_nmf_lda.html
	# explanations are located there : https://www.linkedin.com/pulse/dissociating-training-predicting-latent-dirichlet-lucien-tardres

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.decomposition import LatentDirichletAllocation
	import pickle

	n_features = 50
	n_topics = 2

	# Training dataset
	data_samples = ["I like to eat broccoli and bananas.",
	"I ate a banana and spinach smoothie for breakfast.",
	"Chinchillas and kittens are cute.",
	"My sister adopted a kitten yesterday.",
	"Look at this cute hamster munching on a piece of broccoli."
	]
	# extract fetures and vectorize dataset
	tf_vectorizer = CountVectorizer(max_df=0.95, min_df=1,
	max_features=n_features,
	stop_words='english')
	tf = tf_vectorizer.fit_transform(data_samples)

	#save features
	dic = tf_vectorizer.get_feature_names()

	lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=5,
	learning_method='online',
	learning_offset=50.,
	random_state=0)

	# train LDA
	p1 = lda.fit(tf)

	# Save all data necessary for later prediction
	model = (dic,lda.components_,lda.exp_dirichlet_component_,lda.doc_topic_prior_)

	with open('outfile', 'wb') as fp:
	pickle.dump(model, fp)