Created
August 2, 2025 03:18
-
-
Save tndoan/73ca79e45ae4487a6b6c6a21fbceb28e to your computer and use it in GitHub Desktop.
vectorized Python implementation using NumPy to compute the cosine similarity between every pair of vectors in a list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def cosine_similarity_matrix(vectors): | |
""" | |
Compute the cosine similarity matrix between all pairs of vectors. | |
Parameters: | |
vectors (List or np.ndarray): A 2D array of shape (n_vectors, vector_dim). | |
Returns: | |
np.ndarray: A 2D array of shape (n_vectors, n_vectors) with cosine similarity scores. | |
""" | |
vectors = np.array(vectors) | |
# Normalize the vectors (avoid division by zero) | |
norms = np.linalg.norm(vectors, axis=1, keepdims=True) | |
norms[norms == 0] = 1e-10 # Handle zero vectors | |
normalized = vectors / norms | |
# Cosine similarity = dot product of normalized vectors | |
similarity_matrix = np.dot(normalized, normalized.T) | |
return similarity_matrix | |
# Example usage: | |
vectors = [ | |
[1, 0, 0], | |
[0, 1, 0], | |
[1, 1, 0], | |
] | |
cosine_scores = cosine_similarity_matrix(vectors) | |
print(cosine_scores) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment