GaelVaroquaux · September 25, 2011 18:33
diff --git a/spectral_clustering.py b/spectral_clustering.py
 #!/usr/bin/env python
 # gvaroquaux (adapted from tdh.net gmail com)
 # 31.August.2011
 # try clustering module in scikits.learn
 import numpy as np
 from scipy import linalg

 from sklearn.metrics.pairwise import euclidean_distances
 from sklearn.cluster import SpectralClustering

 ############################################################################
 def print_clusters(labels, k):
    for i in xrange(k):
        print 'cluster %d: ' % i, np.nonzero(labels==i)[0]


 ############################################################################
 def sample_data(num, k=2, dstyle='uniform', sigma=5):
    '''A class to generate sample data
    '''

    # num: the number of 2D data point
    # dstyle: data distribution style
    #        'uniform' -- uniformly distributed
    #        'gaussian' -- gaussian distributed
    # in gaussian distribution, k is the number of Gaussian
    num = num
    k = k
    xmax = 20
    ymax = 20

    # randomly generate data
    data = np.zeros((num, 2))
    if dstyle == 'uniform':
        # uniform distribution
        data[:, 0] = np.random.rand(num)*xmax
        data[:, 1] = np.random.rand(num)*ymax
    elif dstyle == 'gaussian':
        # Gaussian distribution
        # the centers of gaussian
        seeds = np.random.rand(k, 2)*xmax

        ss = 0
        step = num/k + 1
        for i in xrange(k):
            # generate cluster "i": from ss to end
            end = min(ss + step, num)

            # generate x, y separately
            data[ss:end, 0] = np.random.normal(seeds[i, 0], sigma,
                                               end-ss)
            data[ss:end, 1] = np.random.normal(seeds[i, 1], sigma,
                                               end-ss)
            ss = end

    # compute the affinity matrix: distance between every pair of points
    afmat = euclidean_distances(data)

    # format the affinity matrix to correct form:
    afmat = np.exp(-afmat**2/(afmat.std()**2)) + 1e-6
    return afmat

 ##############################################################################

 if __name__ == '__main__':
    num = 400
    k = 2
    dstype = 'uniform' #'gaussian'
    sigma = 5.0
    sdata = sample_data(num, k, dstype, sigma)
    sp_clt = SpectralClustering(k=k, mode='amg')
    sp_clt.fit(sdata)

    # now, visualize
    print_clusters(sp_clt.labels_, k)
    from sklearn.utils.graph import graph_laplacian
    laplacian = graph_laplacian(sdata, normed=True)
    print 'Laplacian spetrum: %s' % linalg.eigvalsh(laplacian)
	#!/usr/bin/env python
	# gvaroquaux (adapted from tdh.net gmail com)
	# 31.August.2011
	# try clustering module in scikits.learn
	import numpy as np
	from scipy import linalg

	from sklearn.metrics.pairwise import euclidean_distances
	from sklearn.cluster import SpectralClustering

	############################################################################
	def print_clusters(labels, k):
	for i in xrange(k):
	print 'cluster %d: ' % i, np.nonzero(labels==i)[0]


	############################################################################
	def sample_data(num, k=2, dstyle='uniform', sigma=5):
	'''A class to generate sample data
	'''

	# num: the number of 2D data point
	# dstyle: data distribution style
	# 'uniform' -- uniformly distributed
	# 'gaussian' -- gaussian distributed
	# in gaussian distribution, k is the number of Gaussian
	num = num
	k = k
	xmax = 20
	ymax = 20

	# randomly generate data
	data = np.zeros((num, 2))
	if dstyle == 'uniform':
	# uniform distribution
	data[:, 0] = np.random.rand(num)*xmax
	data[:, 1] = np.random.rand(num)*ymax
	elif dstyle == 'gaussian':
	# Gaussian distribution
	# the centers of gaussian
	seeds = np.random.rand(k, 2)*xmax

	ss = 0
	step = num/k + 1
	for i in xrange(k):
	# generate cluster "i": from ss to end
	end = min(ss + step, num)

	# generate x, y separately
	data[ss:end, 0] = np.random.normal(seeds[i, 0], sigma,
	end-ss)
	data[ss:end, 1] = np.random.normal(seeds[i, 1], sigma,
	end-ss)
	ss = end

	# compute the affinity matrix: distance between every pair of points
	afmat = euclidean_distances(data)

	# format the affinity matrix to correct form:
	afmat = np.exp(-afmat2/(afmat.std()2)) + 1e-6
	return afmat

	##############################################################################

	if __name__ == '__main__':
	num = 400
	k = 2
	dstype = 'uniform' #'gaussian'
	sigma = 5.0
	sdata = sample_data(num, k, dstype, sigma)
	sp_clt = SpectralClustering(k=k, mode='amg')
	sp_clt.fit(sdata)

	# now, visualize
	print_clusters(sp_clt.labels_, k)
	from sklearn.utils.graph import graph_laplacian
	laplacian = graph_laplacian(sdata, normed=True)
	print 'Laplacian spetrum: %s' % linalg.eigvalsh(laplacian)