nikhilr612 · February 9, 2025 14:02
diff --git a/lle_impl.py b/lle_impl.py
 import numpy as np
 import heapq
 from dataclasses import dataclass

 @dataclass
 class NearestNeighbours:
    """
    Stores the result of a nearest-neighbours search.
    `n_indices` is the list of indices of the vectors nearest to the provided vector.
    """
    n_indices: list[int] = None
    this_index: int = -1

 def naive_knn(k:int, x: np.ndarray, datapoints: list[np.ndarray], proximity_bound = 1e-4, allow_index_result = False) -> NearestNeighbours:
    """
    O(logk n) algorithm for computing nearest-neighbours for a given point.
    If `allow_index_result` is true, then `this_index` field will the index of the vector in datapoints that lies within `proximity_bound` of `x`.
    """
    dists = []
    for (j, dp) in enumerate(datapoints):
        diff = (dp - x)
        dist = np.linalg.norm(diff, ord=2)
        if dist <= proximity_bound:
            if allow_index_result:
                return NearestNeighbours(this_index = j)
            continue
        dists.append((dist, j))
    k_argmin = heapq.nsmallest(k, dists)
    return NearestNeighbours(n_indices=[i for (_s, i) in k_argmin])

 def reconstruct(x: np.ndarray, neighbours: list[np.ndarray], epsilon: float = 0.01) -> np.array:
    """
    Find an approximation for `x` by its neighbours.
    """
    n = len(neighbours)
    X = np.column_stack(neighbours); # d x n
    ones_T = np.ones([1, n]);
    B = x @ ones_T - X
    G = B.transpose() @ B
    try:
        Gm1 = np.linalg.inv(G);
    except:
        Gm1 = np.linalg.inv(G + epsilon * np.identity(n));
    t = Gm1 @ ones_T.transpose() # n x 1
    V = (ones_T @ t).item() # 1 x 1
    w = t / V
    return w


 class StandardLLE:
    def __init__(self, datapoints: list[np.ndarray], embedding_dim: int, k: int):
        """
        Locally linear embeddings ala Roweis & Saul.
        """
        
        # Store dimensions
        self.embedding_dim = embedding_dim
        n = len(datapoints)
        self.d = len(datapoints[0])
        self.k = k
        
        self.W = np.zeros([n, n])

        # construct k-NN weighted graph
        for i in range(n):
            knn_i = naive_knn(k, datapoints[i], datapoints).n_indices
            neighs = [datapoints[j] for j in knn_i]
            w = reconstruct(datapoints[i], neighs)
            for j in range(k):
                self.W[i][knn_i[j]] = w[j].item()

        L = np.identity(n) - self.W
        M = L.transpose() @ L

        eigenvalues, eigenvectors = np.linalg.eig(M)
        filtered = [eigenvectors[i] for i in np.argsort(eigenvalues) if eigenvalues[i] != 0]
        YT = np.column_stack(filtered[:embedding_dim])

        self.embeddings = YT.transpose()
        self.sample_points = datapoints

    def __call__(self, point: np.ndarray):
        knn_res = naive_knn(self.k, point, self.sample_points, allow_index_result=True)
        if knn_res.index >= 0:
            return self.embeddings[knn_res]
        else:
            neighs = [self.sample_points[j] for j in knn_res.n_indices]
            Y = np.column_stack(self.embeddings[j] for j in knn_res.n_indices)
            w = reconstruct(point, neighs)
            return Y @ w
	import numpy as np
	import heapq
	from dataclasses import dataclass

	@dataclass
	class NearestNeighbours:
	"""
	Stores the result of a nearest-neighbours search.
	`n_indices` is the list of indices of the vectors nearest to the provided vector.
	"""
	n_indices: list[int] = None
	this_index: int = -1

	def naive_knn(k:int, x: np.ndarray, datapoints: list[np.ndarray], proximity_bound = 1e-4, allow_index_result = False) -> NearestNeighbours:
	"""
	O(logk n) algorithm for computing nearest-neighbours for a given point.
	If `allow_index_result` is true, then `this_index` field will the index of the vector in datapoints that lies within `proximity_bound` of `x`.
	"""
	dists = []
	for (j, dp) in enumerate(datapoints):
	diff = (dp - x)
	dist = np.linalg.norm(diff, ord=2)
	if dist <= proximity_bound:
	if allow_index_result:
	return NearestNeighbours(this_index = j)
	continue
	dists.append((dist, j))
	k_argmin = heapq.nsmallest(k, dists)
	return NearestNeighbours(n_indices=[i for (_s, i) in k_argmin])

	def reconstruct(x: np.ndarray, neighbours: list[np.ndarray], epsilon: float = 0.01) -> np.array:
	"""
	Find an approximation for `x` by its neighbours.
	"""
	n = len(neighbours)
	X = np.column_stack(neighbours); # d x n
	ones_T = np.ones([1, n]);
	B = x @ ones_T - X
	G = B.transpose() @ B
	try:
	Gm1 = np.linalg.inv(G);
	except:
	Gm1 = np.linalg.inv(G + epsilon * np.identity(n));
	t = Gm1 @ ones_T.transpose() # n x 1
	V = (ones_T @ t).item() # 1 x 1
	w = t / V
	return w


	class StandardLLE:
	def __init__(self, datapoints: list[np.ndarray], embedding_dim: int, k: int):
	"""
	Locally linear embeddings ala Roweis & Saul.
	"""

	# Store dimensions
	self.embedding_dim = embedding_dim
	n = len(datapoints)
	self.d = len(datapoints[0])
	self.k = k

	self.W = np.zeros([n, n])

	# construct k-NN weighted graph
	for i in range(n):
	knn_i = naive_knn(k, datapoints[i], datapoints).n_indices
	neighs = [datapoints[j] for j in knn_i]
	w = reconstruct(datapoints[i], neighs)
	for j in range(k):
	self.W[i][knn_i[j]] = w[j].item()

	L = np.identity(n) - self.W
	M = L.transpose() @ L

	eigenvalues, eigenvectors = np.linalg.eig(M)
	filtered = [eigenvectors[i] for i in np.argsort(eigenvalues) if eigenvalues[i] != 0]
	YT = np.column_stack(filtered[:embedding_dim])

	self.embeddings = YT.transpose()
	self.sample_points = datapoints

	def __call__(self, point: np.ndarray):
	knn_res = naive_knn(self.k, point, self.sample_points, allow_index_result=True)
	if knn_res.index >= 0:
	return self.embeddings[knn_res]
	else:
	neighs = [self.sample_points[j] for j in knn_res.n_indices]
	Y = np.column_stack(self.embeddings[j] for j in knn_res.n_indices)
	w = reconstruct(point, neighs)
	return Y @ w