Last active
November 6, 2018 14:54
-
-
Save nkt1546789/8e6c46aa4c3b55f13d32 to your computer and use it in GitHub Desktop.
An implementation of farthest-first traversal, FFT (D. Hochbaum and D. Shmoys, 1985) on Python (with demo). FFT can be used to initialization for k-means clustering.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def fft(X,D,k): | |
""" | |
X: input vectors (n_samples by dimensionality) | |
D: distance matrix (n_samples by n_samples) | |
k: number of centroids | |
out: indices of centroids | |
""" | |
n=X.shape[0] | |
visited=[] | |
i=np.int32(np.random.uniform(n)) | |
visited.append(i) | |
while len(visited)<k: | |
dist=np.mean([D[i] for i in visited],0) | |
for i in np.argsort(dist)[::-1]: | |
if i not in visited: | |
visited.append(i) | |
break | |
return np.array(visited) | |
if __name__ == '__main__': | |
import matplotlib.pyplot as plt | |
n=300 | |
e=0.1 | |
mu1=np.array([-2.,0.]) | |
mu2=np.array([2.,0.]) | |
mu3=np.array([0.,2.]) | |
mu=np.array([mu1,mu2,mu3]) | |
x1=np.random.multivariate_normal(mu1,e*np.identity(2),n/3) | |
x2=np.random.multivariate_normal(mu2,e*np.identity(2),n/3) | |
x3=np.random.multivariate_normal(mu3,e*np.identity(2),n/3) | |
X=np.r_[x1,x2,x3] | |
y=np.concatenate([np.repeat(0,int(n/3)),np.repeat(1,int(n/3)),np.repeat(2,int(n/3))]) | |
X2=np.c_[np.sum(X**2,1)] | |
D=X2+X2.T-2*X.dot(X.T) | |
centroid_idx=fft(X,D,3) | |
centroids=X[centroid_idx] | |
colors=plt.cm.Paired(np.linspace(0, 1, len(np.unique(y)))) | |
plt.scatter(X[:,0],X[:,1],color=colors[y]) | |
plt.scatter(centroids[:,0],centroids[:,1],color="black",s=50) | |
plt.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the code!
In Python 3.6 and Numpy 1.14.2 the division returns float. So you have to convert them into integers explicitly.