Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save stevenRush/8570b349c4a6c1f3a601d488d100eb34 to your computer and use it in GitHub Desktop.
Save stevenRush/8570b349c4a6c1f3a601d488d100eb34 to your computer and use it in GitHub Desktop.
from __future__ import division
import scipy.sparse as sp
from scipy.sparse.linalg import norm
def cosine_distance(vec1, vec2):
norm1 = norm(vec1)
norm2 = norm(vec2)
return vec1.multiply(vec2).sum() / (norm1 * norm2)
def jaccard_index(vec1, vec2):
bool_vec1 = (vec1 > 0).astype(int)
bool_vec2 = (vec2 > 0).astype(int)
intersect = bool_vec1.multiply(bool_vec2).sum()
return intersect / (bool_vec1.sum() + bool_vec2.sum() - intersect)
def generalized_jaccard(vec1, vec2):
return vec1.minimum(vec2).sum() / vec1.maximum(vec2).sum()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment