Created
December 16, 2019 22:11
-
-
Save allanbatista/147285e1d08da56b9915997c3c3e1e0a to your computer and use it in GitHub Desktop.
Compuse cosine similarity in batchs in large matrixies.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.metrics.pairwise import cosine_similarity | |
def __batch_cosine_similarity_internal__(x_pred, x_true, batch_size=1024): | |
x1_len = x_pred.shape[0] | |
idx = np.array([]) | |
val = np.array([]) | |
for i in range(0, len(x_true), batch_size): | |
distances = cosine_similarity(x_pred, x_true[i:i + batch_size]) | |
_idx = distances.argmax(1) + i | |
_val = np.amax(distances, axis=1) | |
if i == 0: | |
idx = _idx | |
val = _val | |
else: | |
new_val = np.concatenate((val.reshape((1, x1_len)), _val.reshape((1, x1_len))), axis=0).T | |
new_idx = np.concatenate((idx.reshape((1, x1_len)), _idx.reshape((1, x1_len))), axis=0).T | |
val = np.amax(new_val, axis=1) | |
idx = np.array([lidx[cidx] for lidx, cidx in zip(new_idx, new_val.argmax(1))]) | |
return idx, val | |
def batch_cosine_similarity(x_pred, x_true, batch_size=1024): | |
""" | |
x_pred: numpy array that was predict | |
y_pred: numpy array that is considered true | |
batch_size: quantity element need to be compute for each interation | |
""" | |
results_probas = [] | |
results_indexies = [] | |
for i in range(0, len(x_pred), batch_size): | |
indexies, probas = __batch_cosine_similarity_internal__(x_pred[i:i + batch_size], x_true, batch_size=batch_size) | |
results_probas += probas.tolist() | |
results_indexies += indexies.tolist() | |
return results_indexies, results_probas |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment