Last active
December 14, 2017 01:39
-
-
Save ksindi/9429a1c5d2259b105b7a8c4be395defe to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Notes: | |
- Is about 2ms for (100, (10000, 100)) shape inputs on my i7 laptop | |
- It's 2x faster without doing vector normalize (might make sense to pre-normalize the vectors) | |
""" | |
import numpy as np | |
import numba | |
@numba.jit(target='cpu', nopython=True, parallel=True) | |
def fast_cosine_matrix(u, M): | |
"""Return array of cosine similarity between u and rows in matrix M.""" | |
scores = np.empty(M.shape[0]) | |
for i in numba.prange(M.shape[0]): | |
v = M[i] | |
m = u.shape[0] | |
udotv = 0 | |
u_norm = 0 | |
v_norm = 0 | |
for j in range(m): | |
if (np.isnan(u[j])) or (np.isnan(v[j])): | |
continue | |
udotv += u[j] * v[j] | |
u_norm += u[j] * u[j] | |
v_norm += v[j] * v[j] | |
u_norm = np.sqrt(u_norm) | |
v_norm = np.sqrt(v_norm) | |
if (u_norm == 0) or (v_norm == 0): | |
ratio = 1.0 | |
else: | |
ratio = udotv / (u_norm * v_norm) | |
scores[i] = ratio | |
return scores | |
u = np.random.rand(100) | |
M = np.random.rand(100000, 100) | |
fast_cosine_matrix(u, M) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment