Skip to content

Instantly share code, notes, and snippets.

@ialexpovad
Last active December 27, 2022 07:29
Show Gist options
  • Select an option

  • Save ialexpovad/d8d15b03d59ca1bbd822ab66d5c6cc0f to your computer and use it in GitHub Desktop.

Select an option

Save ialexpovad/d8d15b03d59ca1bbd822ab66d5c6cc0f to your computer and use it in GitHub Desktop.
Сalculate squared euclidean distance matrices in Python.
# Several ways to calculate squared Euclidean Distance (Frobenius norm) matrices in Python
# Import modules first:
## import numpy module
import numpy as np
## import numpy linear algebra module
import numpy.linalg as la
## import scipy spatial module
import scipy.spatial as spt
def compute_ED_npNorm(M):
"""
Method1: Using numpy.linalg.norm to compute the squared Euclidean Distance
as norm which compute square roots, it is comparatively costly, it should be avoided if possible.
"""
# determine demensions of data matrix M
m,n = M.shape
# initialize the squared Euclidean Distanse matrix E
E = np.zeros((n,n))
# iteration on upper triangle of matrix
for i in range(n):
for j in range(i + 1, n):
E[i,j] = la.norm(M[:,i] - M[:,j])**2
E[j,i] = E[i,j]
return E
def compute_ED_npDot(M):
"""
Method 2: Using numpy.dot function to compute the squared Euclidean Distance
aiming at efficiency.
"""
# determine demensions of data matrix M
m,n = M.shape
# initialize the squared Euclidean Distanse matrix E
E = np.zeros((n,n))
# iteration on upper triangle of matrix
for i in range(n):
for j in range(i + 1, n):
diff = M[:,i] - M[:,j]
E[i,j] = np.dot(diff, diff)
E[j,i] = E[i,j]
return E
def compute_ED_GramMatrix(M):
"""
Method 3: By pre-computing the Gram matrix G and calling dot for G = M.T*M instead of calling dot
for each pair if vectors separately by using G_ij = M_i.T*M_j, then E_ij = G_ii - 2*G_ij + G_jj
"""
# determine demensions of data matrix M
m,n = M.shape
# initialize the squared Euclidean Distanse matrix E
E = np.zeros((n,n))
# compute Gram matrix G
G = np.dot(M.T, M)
for i in range(n):
for j in range(i + 1, n):
# make use of |x-y|^2 = x'x + y'y - 2x'y
E[i,j] = G[i,i] - 2*G[i,j] + G[j,j]
E[j,i] = E[i,j]
return E
def compute_ED_avoidUseLoops(M):
"""
Method 4: Avoid using for loops
Assume E = H + K - 2*G, where G is Gram matrix as in method num. 3.
We find H_ij = G_ii and K_ij = G_jj H is n*n matrix whose each column correspond
to the diagonal of G. K = H.T, then D = H + H.T - 2*G
"""
# determine demensions of data matrix M
m,n = M.shape
# compute Gram matrix G
G = np.dot(M.T, M)
# compute matrix H
H = np.tile(np.diag(G), (n,1))
return H + H.T - 2*G
def compute_ED_usingSciPy(M):
"""
Method 5: Using SciPy build-in function
using submodule distance under module spatial
"""
V = spt.distance.pdist(M.T, 'sqeuclidean')
return spt.distance.squareform(V)
if __name__ == "__main__":
# Random values of array in a given shape.
M = np.random.rand(100,100)
import time
# region Method 1
start = time.time()
M1 = compute_ED_npNorm(M)
_M1 = np.linalg.norm(M1, ord = 'fro')
end = time.time()
print(end - start)
# endregion
# region Method 2
start = time.time()
M2 = compute_ED_npDot(M)
_M2 = np.linalg.norm(M2, ord = 'fro')
end = time.time()
print(end - start)
# endregion
# region Method 3
start = time.time()
M3 = compute_ED_GramMatrix(M)
_M3 = np.linalg.norm(M3, ord = 'fro')
end = time.time()
print(end - start)
# endregion
# region Method 4
start = time.time()
M4 = compute_ED_avoidUseLoops(M)
_M4 = np.linalg.norm(M4, ord = 'fro')
end = time.time()
print(end - start)
# endregion
# region Method 5
start = time.time()
M5 = compute_ED_npNorm(M)
_M5 = np.linalg.norm(M5, ord = 'fro')
end = time.time()
print(end - start)
# endregion
print(" Method 1: ", round(_M1, 3), '\n',
"Method 2: ", round(_M2, 3),'\n',
"Method 3: ", round(_M3, 3),'\n',
"Method 4: ", round(_M4, 3),'\n',
"Method 5: ", round(_M5, 3))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment