Skip to content

Instantly share code, notes, and snippets.

@nikhilr612
Last active February 9, 2025 14:02
Show Gist options
  • Save nikhilr612/80d071793419210a8290f8dae3c7e5d4 to your computer and use it in GitHub Desktop.
Save nikhilr612/80d071793419210a8290f8dae3c7e5d4 to your computer and use it in GitHub Desktop.
Naive implementation of Locally Linear Embeddings
import numpy as np
import heapq
from dataclasses import dataclass
@dataclass
class NearestNeighbours:
"""
Stores the result of a nearest-neighbours search.
`n_indices` is the list of indices of the vectors nearest to the provided vector.
"""
n_indices: list[int] = None
this_index: int = -1
def naive_knn(k:int, x: np.ndarray, datapoints: list[np.ndarray], proximity_bound = 1e-4, allow_index_result = False) -> NearestNeighbours:
"""
O(logk n) algorithm for computing nearest-neighbours for a given point.
If `allow_index_result` is true, then `this_index` field will the index of the vector in datapoints that lies within `proximity_bound` of `x`.
"""
dists = []
for (j, dp) in enumerate(datapoints):
diff = (dp - x)
dist = np.linalg.norm(diff, ord=2)
if dist <= proximity_bound:
if allow_index_result:
return NearestNeighbours(this_index = j)
continue
dists.append((dist, j))
k_argmin = heapq.nsmallest(k, dists)
return NearestNeighbours(n_indices=[i for (_s, i) in k_argmin])
def reconstruct(x: np.ndarray, neighbours: list[np.ndarray], epsilon: float = 0.01) -> np.array:
"""
Find an approximation for `x` by its neighbours.
"""
n = len(neighbours)
X = np.column_stack(neighbours); # d x n
ones_T = np.ones([1, n]);
B = x @ ones_T - X
G = B.transpose() @ B
try:
Gm1 = np.linalg.inv(G);
except:
Gm1 = np.linalg.inv(G + epsilon * np.identity(n));
t = Gm1 @ ones_T.transpose() # n x 1
V = (ones_T @ t).item() # 1 x 1
w = t / V
return w
class StandardLLE:
def __init__(self, datapoints: list[np.ndarray], embedding_dim: int, k: int):
"""
Locally linear embeddings ala Roweis & Saul.
"""
# Store dimensions
self.embedding_dim = embedding_dim
n = len(datapoints)
self.d = len(datapoints[0])
self.k = k
self.W = np.zeros([n, n])
# construct k-NN weighted graph
for i in range(n):
knn_i = naive_knn(k, datapoints[i], datapoints).n_indices
neighs = [datapoints[j] for j in knn_i]
w = reconstruct(datapoints[i], neighs)
for j in range(k):
self.W[i][knn_i[j]] = w[j].item()
L = np.identity(n) - self.W
M = L.transpose() @ L
eigenvalues, eigenvectors = np.linalg.eig(M)
filtered = [eigenvectors[i] for i in np.argsort(eigenvalues) if eigenvalues[i] != 0]
YT = np.column_stack(filtered[:embedding_dim])
self.embeddings = YT.transpose()
self.sample_points = datapoints
def __call__(self, point: np.ndarray):
knn_res = naive_knn(self.k, point, self.sample_points, allow_index_result=True)
if knn_res.index >= 0:
return self.embeddings[knn_res]
else:
neighs = [self.sample_points[j] for j in knn_res.n_indices]
Y = np.column_stack(self.embeddings[j] for j in knn_res.n_indices)
w = reconstruct(point, neighs)
return Y @ w
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment