nkt1546789 · August 29, 2016 14:45
diff --git a/single_topic_unigram_generator.py b/single_topic_unigram_generator.py
 import numpy as np
 from scipy import sparse

 class SingleTopicUnigramGenerator(object):
    def __init__(self, n_topics=3, n_features=1000, alpha=1.0, beta=1.0):
        self.n_topics = n_topics
        self.n_features = n_features
        self.alpha = alpha
        self.beta = beta

    def generate(self, n_docs=200, min_length=100, max_length=100):
        theta = np.random.dirichlet(np.repeat(self.beta, self.n_topics), 1)[0]

        # For each topic, generating word distribution
        Phi = np.random.dirichlet(np.repeat(self.beta, self.n_features), self.n_topics)

        # generating topics
        z = np.random.multinomial(1, pvals=theta, size=n_docs).argmax(axis=1)

        # generating unigrams whose length is 100 ~ 300
        W = []
        for d in xrange(n_docs):
            length = np.int32(np.random.uniform(min_length, max_length))
            wd = np.array(np.random.multinomial(length, pvals=Phi[z[d]], size=1)[0], dtype=np.float64)
            W.append(wd)
        W = sparse.csr_matrix(W)

        return W, z
	import numpy as np
	from scipy import sparse

	class SingleTopicUnigramGenerator(object):
	def __init__(self, n_topics=3, n_features=1000, alpha=1.0, beta=1.0):
	self.n_topics = n_topics
	self.n_features = n_features
	self.alpha = alpha
	self.beta = beta

	def generate(self, n_docs=200, min_length=100, max_length=100):
	theta = np.random.dirichlet(np.repeat(self.beta, self.n_topics), 1)[0]

	# For each topic, generating word distribution
	Phi = np.random.dirichlet(np.repeat(self.beta, self.n_features), self.n_topics)

	# generating topics
	z = np.random.multinomial(1, pvals=theta, size=n_docs).argmax(axis=1)

	# generating unigrams whose length is 100 ~ 300
	W = []
	for d in xrange(n_docs):
	length = np.int32(np.random.uniform(min_length, max_length))
	wd = np.array(np.random.multinomial(length, pvals=Phi[z[d]], size=1)[0], dtype=np.float64)
	W.append(wd)
	W = sparse.csr_matrix(W)

	return W, z