bertomartin · November 21, 2016 19:13
diff --git a/from_sklearn.py b/from_sklearn.py
 def from_sklearn(docs,vect,lda,**kwargs):
    
    """Create Prepared Data from sklearn's vectorizer and Latent Dirichlet
    Application
    
    Parameters
    ----------
    docs : Pandas Series.
        Documents to be passed as an input.
    vect : Scikit-Learn Vectorizer (CountVectorizer,TfIdfVectorizer).
        vectorizer to convert documents into matrix sparser
    lda  : sklearn.decomposition.LatentDirichletAllocation.
        Latent Dirichlet Allocation
    
    **kwargs: Keyword argument to be passed to pyLDAvis.prepare()
    
    
    Returns
    -------
    prepared_data : PreparedData
    vect : sklearn's Vectorizer.
    lda : sklearn's Latent Dirichlet Allocation.
    """
    
    norm = lambda data: pd.DataFrame(data).div(data.sum(1),axis=0).values
    
    vected = vect.fit_transform(docs)
    doc_topic_dists = norm(lda.fit_transform(vected))
    
    prepared = prepare(
                        doc_lengths = docs.str.len(),
                        vocab = vect.get_feature_names(),
                        term_frequency = vected.sum(axis=0).tolist()[0],
                        topic_term_dists = norm(lda.components_),
                        doc_topic_dists = doc_topic_dists,
                        **kwargs)

    return prepared,lda,vect

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from pyLDAVis import prepare


 vect = CountVectorizer()
 lda = LatentDirichletAllocation()

 prepared = from_sklearn(docs,vect,lda)

 #Using LDA module from https://github.com/ariddell/lda

 import lda

 lda = lda.LDA()
 prepared = from_sklearn(docs,vect,lda)
	def from_sklearn(docs,vect,lda,**kwargs):

	"""Create Prepared Data from sklearn's vectorizer and Latent Dirichlet
	Application

	Parameters
	----------
	docs : Pandas Series.
	Documents to be passed as an input.
	vect : Scikit-Learn Vectorizer (CountVectorizer,TfIdfVectorizer).
	vectorizer to convert documents into matrix sparser
	lda : sklearn.decomposition.LatentDirichletAllocation.
	Latent Dirichlet Allocation

	**kwargs: Keyword argument to be passed to pyLDAvis.prepare()


	Returns
	-------
	prepared_data : PreparedData
	vect : sklearn's Vectorizer.
	lda : sklearn's Latent Dirichlet Allocation.
	"""

	norm = lambda data: pd.DataFrame(data).div(data.sum(1),axis=0).values

	vected = vect.fit_transform(docs)
	doc_topic_dists = norm(lda.fit_transform(vected))

	prepared = prepare(
	doc_lengths = docs.str.len(),
	vocab = vect.get_feature_names(),
	term_frequency = vected.sum(axis=0).tolist()[0],
	topic_term_dists = norm(lda.components_),
	doc_topic_dists = doc_topic_dists,
	**kwargs)

	return prepared,lda,vect

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.decomposition import LatentDirichletAllocation
	from pyLDAVis import prepare


	vect = CountVectorizer()
	lda = LatentDirichletAllocation()

	prepared = from_sklearn(docs,vect,lda)

	#Using LDA module from https://github.com/ariddell/lda

	import lda

	lda = lda.LDA()
	prepared = from_sklearn(docs,vect,lda)