Skip to content

Instantly share code, notes, and snippets.

@ksindi
Last active December 14, 2017 01:39
Show Gist options
  • Save ksindi/9429a1c5d2259b105b7a8c4be395defe to your computer and use it in GitHub Desktop.
Save ksindi/9429a1c5d2259b105b7a8c4be395defe to your computer and use it in GitHub Desktop.
"""
Notes:
- Is about 2ms for (100, (10000, 100)) shape inputs on my i7 laptop
- It's 2x faster without doing vector normalize (might make sense to pre-normalize the vectors)
"""
import numpy as np
import numba
@numba.jit(target='cpu', nopython=True, parallel=True)
def fast_cosine_matrix(u, M):
"""Return array of cosine similarity between u and rows in matrix M."""
scores = np.empty(M.shape[0])
for i in numba.prange(M.shape[0]):
v = M[i]
m = u.shape[0]
udotv = 0
u_norm = 0
v_norm = 0
for j in range(m):
if (np.isnan(u[j])) or (np.isnan(v[j])):
continue
udotv += u[j] * v[j]
u_norm += u[j] * u[j]
v_norm += v[j] * v[j]
u_norm = np.sqrt(u_norm)
v_norm = np.sqrt(v_norm)
if (u_norm == 0) or (v_norm == 0):
ratio = 1.0
else:
ratio = udotv / (u_norm * v_norm)
scores[i] = ratio
return scores
u = np.random.rand(100)
M = np.random.rand(100000, 100)
fast_cosine_matrix(u, M)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment