Last active
January 20, 2016 09:54
-
-
Save takuti/622ee8c3d786d292ce6d to your computer and use it in GitHub Desktop.
Collaborative Filtering for row items in a given N-by-M matrix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class CF: | |
"""Collaborative filtering for row items in a given N-by-M matrix | |
""" | |
def __init__(self, mat): | |
self.mat = mat | |
self.N, self.M = mat.shape | |
def similarity(self, v1, v2, func='cosine'): | |
"""Compute similarity between v1 and v2 | |
v1 and v2 are vectors (1D numpy array) | |
""" | |
if np.array_equal(v1, v2): return 1. | |
if func == 'cosine': | |
v1_norm = np.sqrt(sum(v1 ** 2)) | |
v2_norm = np.sqrt(sum(v2 ** 2)) | |
if v1_norm == 0. or v2_norm == 0.: return 0. | |
return float(sum(v1 * v2)) / (v1_norm * v2_norm) | |
elif func == 'euclidean': | |
return 1. / (1. + np.sqrt(sum((v1 - v2) ** 2))) | |
def run(self): | |
self.sims_mat = np.zeros((self.N, self.N)) | |
for i in xrange(self.N): | |
for j in xrange(i, self.N): | |
sim = self.similarity(self.mat[i, :], self.mat[j, :]) | |
self.sims_mat[i, j] = self.sims_mat[j, i] = sim | |
def top_n(self, n): | |
"""Return Top-N similar items' indices for each row | |
""" | |
if not hasattr(self, 'sims_mat'): self.run() | |
tops_idx = self.sims_mat[0, :].argsort()[::-1][:n] | |
tops_sim = np.sort(self.sims_mat[0, :])[::-1][:n] | |
for i in xrange(1, self.N): | |
tops_idx = np.vstack((tops_idx, self.sims_mat[i, :].argsort()[::-1][:n])) | |
tops_sim = np.vstack((tops_sim, np.sort(self.sims_mat[i, :])[::-1][:n])) | |
return tops_idx, tops_sim |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment