Skip to content

Instantly share code, notes, and snippets.

@GaelVaroquaux
Created September 25, 2011 18:33
Show Gist options
  • Save GaelVaroquaux/1240940 to your computer and use it in GitHub Desktop.
Save GaelVaroquaux/1240940 to your computer and use it in GitHub Desktop.
With a random shapeless affinity matrix, spectral clustering does not work: the spectrum of the laplacian is flat.
#!/usr/bin/env python
# gvaroquaux (adapted from tdh.net gmail com)
# 31.August.2011
# try clustering module in scikits.learn
import numpy as np
from scipy import linalg
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.cluster import SpectralClustering
############################################################################
def print_clusters(labels, k):
for i in xrange(k):
print 'cluster %d: ' % i, np.nonzero(labels==i)[0]
############################################################################
def sample_data(num, k=2, dstyle='uniform', sigma=5):
'''A class to generate sample data
'''
# num: the number of 2D data point
# dstyle: data distribution style
# 'uniform' -- uniformly distributed
# 'gaussian' -- gaussian distributed
# in gaussian distribution, k is the number of Gaussian
num = num
k = k
xmax = 20
ymax = 20
# randomly generate data
data = np.zeros((num, 2))
if dstyle == 'uniform':
# uniform distribution
data[:, 0] = np.random.rand(num)*xmax
data[:, 1] = np.random.rand(num)*ymax
elif dstyle == 'gaussian':
# Gaussian distribution
# the centers of gaussian
seeds = np.random.rand(k, 2)*xmax
ss = 0
step = num/k + 1
for i in xrange(k):
# generate cluster "i": from ss to end
end = min(ss + step, num)
# generate x, y separately
data[ss:end, 0] = np.random.normal(seeds[i, 0], sigma,
end-ss)
data[ss:end, 1] = np.random.normal(seeds[i, 1], sigma,
end-ss)
ss = end
# compute the affinity matrix: distance between every pair of points
afmat = euclidean_distances(data)
# format the affinity matrix to correct form:
afmat = np.exp(-afmat**2/(afmat.std()**2)) + 1e-6
return afmat
##############################################################################
if __name__ == '__main__':
num = 400
k = 2
dstype = 'uniform' #'gaussian'
sigma = 5.0
sdata = sample_data(num, k, dstype, sigma)
sp_clt = SpectralClustering(k=k, mode='amg')
sp_clt.fit(sdata)
# now, visualize
print_clusters(sp_clt.labels_, k)
from sklearn.utils.graph import graph_laplacian
laplacian = graph_laplacian(sdata, normed=True)
print 'Laplacian spetrum: %s' % linalg.eigvalsh(laplacian)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment