Skip to content

Instantly share code, notes, and snippets.

@nkt1546789
Last active July 4, 2016 08:55
Show Gist options
  • Save nkt1546789/7562a66aa7157377fa531f164495a3be to your computer and use it in GitHub Desktop.
Save nkt1546789/7562a66aa7157377fa531f164495a3be to your computer and use it in GitHub Desktop.
An implementation of TextRank with cosine similarity. This code is based on graphranker.py (https://gist.github.com/nkt1546789/f5a8f3c5bb4445d141fe7dd03a84bcd1).
import numpy as np
from scipy import sparse
from sklearn import preprocessing
from graphranker import GraphRanker
class TextRank(GraphRanker):
def fit(self, texts):
self.texts = texts
dictionary = {}
data = []
row = []
col = []
for i, text in enumerate(texts):
for token in text:
j = dictionary.setdefault(token, len(dictionary))
data.append(1.0)
row.append(i)
col.append(j)
X = sparse.csr_matrix(sparse.coo_matrix((data,(row,col))))
S = X.dot(X.T)
return super(TextRank, self).fit(S)
def print_texts(self, topn=10):
for i in np.argsort(self.f)[::-1][:topn]:
print self.f[i], u"".join(self.texts[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment