nkt1546789 · July 3, 2016 04:22
diff --git a/tokenrank.py b/tokenrank.py
 import numpy as np
 from scipy import sparse
 from sklearn import preprocessing
 from graphranker import GraphRanker

 class TokenRank(GraphRanker):
    def __init__(self, window=10, **kwds):
        self.window = window
        super(TokenRank, self).__init__(**kwds)

    def fit(self, token_lists):
        # construct cooccurrence matrix
        dictionary = {}
        data = []
        row = []
        col = []
        for token_list in token_lists:
            n = len(token_list)
            for pos in xrange(n):
                start = max(0, pos - self.window)
                end = min(pos + self.window + 1, n)
                i = dictionary.setdefault(token_list[pos], len(dictionary))
                for pos2 in xrange(start, end):
                    j = dictionary.setdefault(token_list[pos2], len(dictionary))
                    data.append(1.0)
                    row.append(i)
                    col.append(j)
        A = sparse.coo_matrix((data,(row,col)))
        A = (A + A.T) != 0
        self.dictionary = dictionary
        self.id2token = {value: key for key, value in dictionary.items()}
        return super(TokenRank, self).fit(A)

    def score(self, token):
        return self.f[self.dictionary[token]]

    def print_tokens(self, topn=10):
        for i in np.argsort(self.f)[::-1][:topn]:
            print self.id2token[i], self.f[i]
	import numpy as np
	from scipy import sparse
	from sklearn import preprocessing
	from graphranker import GraphRanker

	class TokenRank(GraphRanker):
	def __init__(self, window=10, **kwds):
	self.window = window
	super(TokenRank, self).__init__(**kwds)

	def fit(self, token_lists):
	# construct cooccurrence matrix
	dictionary = {}
	data = []
	row = []
	col = []
	for token_list in token_lists:
	n = len(token_list)
	for pos in xrange(n):
	start = max(0, pos - self.window)
	end = min(pos + self.window + 1, n)
	i = dictionary.setdefault(token_list[pos], len(dictionary))
	for pos2 in xrange(start, end):
	j = dictionary.setdefault(token_list[pos2], len(dictionary))
	data.append(1.0)
	row.append(i)
	col.append(j)
	A = sparse.coo_matrix((data,(row,col)))
	A = (A + A.T) != 0
	self.dictionary = dictionary
	self.id2token = {value: key for key, value in dictionary.items()}
	return super(TokenRank, self).fit(A)

	def score(self, token):
	return self.f[self.dictionary[token]]

	def print_tokens(self, topn=10):
	for i in np.argsort(self.f)[::-1][:topn]:
	print self.id2token[i], self.f[i]