Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Last active July 3, 2016 04:22
Show Gist options
  • Save nkt1546789/a53a145282fd3befb89e5618408f4cff to your computer and use it in GitHub Desktop.
Save nkt1546789/a53a145282fd3befb89e5618408f4cff to your computer and use it in GitHub Desktop.
import numpy as np
from scipy import sparse
from sklearn import preprocessing
from graphranker import GraphRanker
class TokenRank(GraphRanker):
def __init__(self, window=10, **kwds):
self.window = window
super(TokenRank, self).__init__(**kwds)
def fit(self, token_lists):
# construct cooccurrence matrix
dictionary = {}
data = []
row = []
col = []
for token_list in token_lists:
n = len(token_list)
for pos in xrange(n):
start = max(0, pos - self.window)
end = min(pos + self.window + 1, n)
i = dictionary.setdefault(token_list[pos], len(dictionary))
for pos2 in xrange(start, end):
j = dictionary.setdefault(token_list[pos2], len(dictionary))
data.append(1.0)
row.append(i)
col.append(j)
A = sparse.coo_matrix((data,(row,col)))
A = (A + A.T) != 0
self.dictionary = dictionary
self.id2token = {value: key for key, value in dictionary.items()}
return super(TokenRank, self).fit(A)
def score(self, token):
return self.f[self.dictionary[token]]
def print_tokens(self, topn=10):
for i in np.argsort(self.f)[::-1][:topn]:
print self.id2token[i], self.f[i]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment