Skip to content

Instantly share code, notes, and snippets.

@do-me
Last active October 22, 2024 09:13
Show Gist options
  • Save do-me/49d0c907593afd5b93dad07dbceb6d0a to your computer and use it in GitHub Desktop.
Save do-me/49d0c907593afd5b93dad07dbceb6d0a to your computer and use it in GitHub Desktop.
Quick cosine similarity with numpy & query with pandas
from numpy.linalg import norm
cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b)) # from https://huggingface.co/jinaai/jina-embeddings-v2-base-en
query = "social democracy"
quer_emb = model.encode(query)
df["cos_sim"] = df["embeddings"].apply(lambda x: cos_sim(x, quer_emb))
df = df.sort_values("cos_sim", ascending=False)
##################################################################################################
# 2x faster for 350k rows
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
query = "social democracy"
quer_emb = model.encode(query)
embeddings_matrix = np.stack(df['embeddings'].to_numpy())
df["cos_sim"] = cosine_similarity(embeddings_matrix, quer_emb.reshape(1, -1))[:, 0]
df = df.sort_values("cos_sim", ascending=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment