Last active
May 31, 2022 01:03
-
-
Save j-adamczyk/9312f41892badf9571ad4fd44718b5af to your computer and use it in GitHub Desktop.
K-Means clustring with faiss library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import faiss | |
import numpy as np | |
class FaissKMeans: | |
def __init__(self, n_clusters=8, n_init=10, max_iter=300): | |
self.n_clusters = n_clusters | |
self.n_init = n_init | |
self.max_iter = max_iter | |
self.kmeans = None | |
self.cluster_centers_ = None | |
self.inertia_ = None | |
def fit(self, X, y): | |
self.kmeans = faiss.Kmeans(d=X.shape[1], | |
k=self.n_clusters, | |
niter=self.max_iter, | |
nredo=self.n_init) | |
self.kmeans.train(X.astype(np.float32)) | |
self.cluster_centers_ = self.kmeans.centroids | |
self.inertia_ = self.kmeans.obj[-1] | |
def predict(self, X): | |
return self.kmeans.index.search(X.astype(np.float32), 1)[1] |
@NightMachinary sure, that should be simple, I'll write about it in a 2-3 weeks
Could you please check that the inertia is reported correctly for kmeans clustering with faiss? For me it goes up with the number of clusters, which is suspicious... Thanks!
@spokatee how would an increase in inertia reporting effect the model?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can you add a GPU version as well?