Skip to content

Instantly share code, notes, and snippets.

@allanbatista
Created December 16, 2019 22:11
Show Gist options
  • Save allanbatista/147285e1d08da56b9915997c3c3e1e0a to your computer and use it in GitHub Desktop.
Save allanbatista/147285e1d08da56b9915997c3c3e1e0a to your computer and use it in GitHub Desktop.
Compuse cosine similarity in batchs in large matrixies.
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
def __batch_cosine_similarity_internal__(x_pred, x_true, batch_size=1024):
x1_len = x_pred.shape[0]
idx = np.array([])
val = np.array([])
for i in range(0, len(x_true), batch_size):
distances = cosine_similarity(x_pred, x_true[i:i + batch_size])
_idx = distances.argmax(1) + i
_val = np.amax(distances, axis=1)
if i == 0:
idx = _idx
val = _val
else:
new_val = np.concatenate((val.reshape((1, x1_len)), _val.reshape((1, x1_len))), axis=0).T
new_idx = np.concatenate((idx.reshape((1, x1_len)), _idx.reshape((1, x1_len))), axis=0).T
val = np.amax(new_val, axis=1)
idx = np.array([lidx[cidx] for lidx, cidx in zip(new_idx, new_val.argmax(1))])
return idx, val
def batch_cosine_similarity(x_pred, x_true, batch_size=1024):
"""
x_pred: numpy array that was predict
y_pred: numpy array that is considered true
batch_size: quantity element need to be compute for each interation
"""
results_probas = []
results_indexies = []
for i in range(0, len(x_pred), batch_size):
indexies, probas = __batch_cosine_similarity_internal__(x_pred[i:i + batch_size], x_true, batch_size=batch_size)
results_probas += probas.tolist()
results_indexies += indexies.tolist()
return results_indexies, results_probas
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment