Last active
February 9, 2025 14:02
-
-
Save nikhilr612/80d071793419210a8290f8dae3c7e5d4 to your computer and use it in GitHub Desktop.
Naive implementation of Locally Linear Embeddings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import heapq | |
from dataclasses import dataclass | |
@dataclass | |
class NearestNeighbours: | |
""" | |
Stores the result of a nearest-neighbours search. | |
`n_indices` is the list of indices of the vectors nearest to the provided vector. | |
""" | |
n_indices: list[int] = None | |
this_index: int = -1 | |
def naive_knn(k:int, x: np.ndarray, datapoints: list[np.ndarray], proximity_bound = 1e-4, allow_index_result = False) -> NearestNeighbours: | |
""" | |
O(logk n) algorithm for computing nearest-neighbours for a given point. | |
If `allow_index_result` is true, then `this_index` field will the index of the vector in datapoints that lies within `proximity_bound` of `x`. | |
""" | |
dists = [] | |
for (j, dp) in enumerate(datapoints): | |
diff = (dp - x) | |
dist = np.linalg.norm(diff, ord=2) | |
if dist <= proximity_bound: | |
if allow_index_result: | |
return NearestNeighbours(this_index = j) | |
continue | |
dists.append((dist, j)) | |
k_argmin = heapq.nsmallest(k, dists) | |
return NearestNeighbours(n_indices=[i for (_s, i) in k_argmin]) | |
def reconstruct(x: np.ndarray, neighbours: list[np.ndarray], epsilon: float = 0.01) -> np.array: | |
""" | |
Find an approximation for `x` by its neighbours. | |
""" | |
n = len(neighbours) | |
X = np.column_stack(neighbours); # d x n | |
ones_T = np.ones([1, n]); | |
B = x @ ones_T - X | |
G = B.transpose() @ B | |
try: | |
Gm1 = np.linalg.inv(G); | |
except: | |
Gm1 = np.linalg.inv(G + epsilon * np.identity(n)); | |
t = Gm1 @ ones_T.transpose() # n x 1 | |
V = (ones_T @ t).item() # 1 x 1 | |
w = t / V | |
return w | |
class StandardLLE: | |
def __init__(self, datapoints: list[np.ndarray], embedding_dim: int, k: int): | |
""" | |
Locally linear embeddings ala Roweis & Saul. | |
""" | |
# Store dimensions | |
self.embedding_dim = embedding_dim | |
n = len(datapoints) | |
self.d = len(datapoints[0]) | |
self.k = k | |
self.W = np.zeros([n, n]) | |
# construct k-NN weighted graph | |
for i in range(n): | |
knn_i = naive_knn(k, datapoints[i], datapoints).n_indices | |
neighs = [datapoints[j] for j in knn_i] | |
w = reconstruct(datapoints[i], neighs) | |
for j in range(k): | |
self.W[i][knn_i[j]] = w[j].item() | |
L = np.identity(n) - self.W | |
M = L.transpose() @ L | |
eigenvalues, eigenvectors = np.linalg.eig(M) | |
filtered = [eigenvectors[i] for i in np.argsort(eigenvalues) if eigenvalues[i] != 0] | |
YT = np.column_stack(filtered[:embedding_dim]) | |
self.embeddings = YT.transpose() | |
self.sample_points = datapoints | |
def __call__(self, point: np.ndarray): | |
knn_res = naive_knn(self.k, point, self.sample_points, allow_index_result=True) | |
if knn_res.index >= 0: | |
return self.embeddings[knn_res] | |
else: | |
neighs = [self.sample_points[j] for j in knn_res.n_indices] | |
Y = np.column_stack(self.embeddings[j] for j in knn_res.n_indices) | |
w = reconstruct(point, neighs) | |
return Y @ w |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment