Skip to content

Instantly share code, notes, and snippets.

@jiqiujia
Last active December 24, 2016 08:39
Show Gist options
  • Save jiqiujia/d121479369e7ef86554d1612a93c02c0 to your computer and use it in GitHub Desktop.
Save jiqiujia/d121479369e7ef86554d1612a93c02c0 to your computer and use it in GitHub Desktop.
graph util functions in python, from https://github.com/mdeff/ntds_2016
# ### python_ncut_lib.py
# Copyright (C) 2010 R. Cameron Craddock ([email protected])
#
# This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
# data. It provides the library functions for performing normalized cut clustering according to:
#
# Shi, J., & Malik, J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis
# and Machine Intelligence, 22(8), 888-905. doi: 10.1109/34.868688.
#
# Yu, S. X., & Shi, J. (2003). Multiclass spectral clustering. Proceedings Ninth IEEE International Conference
# on Computer Vision, (1), 313-319 vol.1. Ieee. doi: 10.1109/ICCV.2003.1238361.
#
# This code is a port of the NcutClustering_7 matlab toolbox available here: http://www.cis.upenn.edu/~jshi/software/
#
# For more information refer to:
#
# Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole
# brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping,
# doi: 10.1002/hbm.21333.
#
# Documentation, updated source code and other information can be found at the NITRC web page:
# http://www.nitrc.org/projects/cluster_roi/
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
####
# this scripts requires NumPy (numpy.scipy.org) and SciPy (www.scipy.org) to be
# installed in a directory that is accessible through PythonPath
import sys
from numpy import array, reshape, shape, matrix, ones, zeros, sqrt, sort, arange
from numpy import nonzero, fromfile, tile, append, prod, double, argsort, sign
from numpy import kron, multiply, divide, abs, reshape, asarray
from scipy import rand
from scipy.sparse import csc_matrix, spdiags
from scipy.sparse.linalg.eigen.arpack import eigsh
from scipy.linalg import norm, svd, LinAlgError
# exception hander for singular value decomposition
class SVDError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
# (eigen_val, eigen_vec) = ncut( W, nbEigenValues ):
#
# This function performs the first step of normalized cut spectral clustering. The normalized LaPlacian
# is calculated on the similarity matrix W, and top nbEigenValues eigenvectors are calculated. The number of
# eigenvectors corresponds to the maximum number of classes (K) that will be produced by the clustering algorithm.
#
# W: symmetric #feature x #feature sparse matrix representing the similarity between voxels,
# traditionally this matrix should be positive semidefinite, but a trick is implemented to
# allow negative matrix entries
# nvEigenValues: number of eigenvectors that should be calculated, this determines the maximum number of
# clusters (K) that can be derived from the result
# eigen_val: (output) eigenvalues from the eigen decomposition of the LaPlacian of W
# eigen_vec: (output) eigenvectors from the eign decomposition of the LaPlacian of W
#
def ncut(W, nbEigenValues):
# parameters
offset = .5
maxiterations = 100
eigsErrorTolerence = 1e-6
truncMin = 1e-6
eps = 2.2204e-16
m = shape(W)[1]
# make sure that W is symmetric, this is a computationally expensive operation, only use for debugging
#if (W-W.transpose()).sum() != 0:
# print "W should be symmetric!"
# exit(0)
# degrees and regularization
# S Yu Understanding Popout through Repulsion CVPR 2001
# Allows negative values as well as improves invertability
# of d for small numbers
# i bet that this is what improves the stability of the eigen
d = abs(W).sum(0)
dr = 0.5 * (d - W.sum(0))
d = d + offset * 2
dr = dr + offset
# calculation of the normalized LaPlacian
W = W + spdiags(dr, [0], m, m, "csc")
Dinvsqrt = spdiags((1.0 / sqrt(d + eps)), [0], m, m, "csc")
P = Dinvsqrt * (W * Dinvsqrt);
# perform the eigen decomposition
#eigen_val, eigen_vec = eigsh(P, nbEigenValues, maxiter=maxiterations, tol=eigsErrorTolerence, which='LA')
eigen_val, eigen_vec = eigsh(P, nbEigenValues, tol=eigsErrorTolerence, which='LA')
# sort the eigen_vals so that the first
# is the largest
i = argsort(-eigen_val)
eigen_val = eigen_val[i]
eigen_vec = eigen_vec[:, i]
# normalize the returned eigenvectors
eigen_vec = Dinvsqrt * matrix(eigen_vec)
norm_ones = norm(ones((m, 1)))
for i in range(0, shape(eigen_vec)[1]):
eigen_vec[:, i] = (eigen_vec[:, i] / norm(eigen_vec[:, i])) * norm_ones
if eigen_vec[0, i] != 0:
eigen_vec[:, i] = -1 * eigen_vec[:, i] * sign(eigen_vec[0, i])
return (eigen_val, eigen_vec)
# eigenvec_discrete=discretisation( eigen_vec ):
#
# This function performs the second step of normalized cut clustering which assigns features to clusters
# based on the eigen vectors from the LaPlacian of a similarity matrix. There are a few different ways to
# perform this task. Shi and Malik (2000) iteratively bisect the features based on the positive and
# negative loadings of the eigenvectors. Ng, Jordan and Weiss (2001) proposed to perform K-means clustering
# on the rows of the eigenvectors. The method implemented here was proposed by Yu and Shi (2003) and it finds
# a discrete solution by iteratively rotating a binaised set of vectors until they are maximally similar to
# the the eigenvectors (for more information, the full citation is at the top of this file). An advantage
# of this method over K-means is that it is _more_ deterministic, i.e. you should get very similar results
# every time you run the algorithm on the same data.
#
# The number of clusters that the features are clustered into is determined by the number of eignevectors
# (number of columns) in the input array eigen_vec. A caveat of this method, is that number of resulting
# clusters is bound by the number of eignevectors, but it may contain less.
#
# eigen_vec: Eigenvectors of the normalized LaPlacian calculated from the similarity matrix
# for the corresponding clustering problem
# eigen_vec_discrete: (output) discretised eigenvectors, i.e. vectors of 0 and 1 which indicate
# wether or not a feature belongs to the cluster defined by the eigen vector.
# I.E. a one in the 10th row of the 4th eigenvector (column) means that feature
# 10 belongs to cluster #4.
#
def discretisation(eigen_vec):
eps = 2.2204e-16
# normalize the eigenvectors
[n, k] = shape(eigen_vec)
vm = kron(ones((1, k)), sqrt(multiply(eigen_vec, eigen_vec).sum(1)))
eigen_vec = divide(eigen_vec, vm)
svd_restarts = 0
exitLoop = 0
### if there is an exception we try to randomize and rerun SVD again
### do this 30 times
while (svd_restarts < 30) and (exitLoop == 0):
# initialize algorithm with a random ordering of eigenvectors
c = zeros((n, 1))
R = matrix(zeros((k, k)))
R[:, 0] = eigen_vec[int(rand(1) * (n)), :].transpose()
for j in range(1, k):
c = c + abs(eigen_vec * R[:, j - 1])
R[:, j] = eigen_vec[c.argmin(), :].transpose()
lastObjectiveValue = 0
nbIterationsDiscretisation = 0
nbIterationsDiscretisationMax = 20
# iteratively rotate the discretised eigenvectors until they
# are maximally similar to the input eignevectors, this
# converges when the differences between the current solution
# and the previous solution differs by less than eps or we
# we have reached the maximum number of itarations
while exitLoop == 0:
nbIterationsDiscretisation = nbIterationsDiscretisation + 1
# rotate the original eigen_vectors
tDiscrete = eigen_vec * R
# discretise the result by setting the max of each row=1 and
# other values to 0
j = reshape(asarray(tDiscrete.argmax(1)), n)
eigenvec_discrete = csc_matrix((ones(len(j)), (range(0, n), array(j))), shape=(n, k))
# calculate a rotation to bring the discrete eigenvectors cluster to the
# original eigenvectors
tSVD = eigenvec_discrete.transpose() * eigen_vec
# catch a SVD convergence error and restart
try:
U, S, Vh = svd(tSVD)
svd_restarts += 1
except LinAlgError:
# catch exception and go back to the beginning of the loop
print >> sys.stderr, "SVD did not converge, randomizing and trying again"
break
# test for convergence
NcutValue = 2 * (n - S.sum())
if ((abs(NcutValue - lastObjectiveValue) < eps ) or
( nbIterationsDiscretisation > nbIterationsDiscretisationMax )):
exitLoop = 1
else:
# otherwise calculate rotation and continue
lastObjectiveValue = NcutValue
R = matrix(Vh).transpose() * matrix(U).transpose()
if exitLoop == 0:
raise SVDError("SVD did not converge after 30 retries")
else:
return (eigenvec_discrete)
import numpy as np
import scipy.sparse
from collections import defaultdict
from scipy import stats
import ncut
import sklearn.metrics.pairwise
import time
import matplotlib.pyplot as plt
import time
def reindex_W_with_classes(W,C):
"""
Function that reindexes W according to communities/classes
Usage:
[reindexed_W,reindexed_C] = reindex_W_with_C(W,C)
Notations:
n = nb_data
nc = nb_communities
Input variables:
W = Adjacency matrix. Size = n x n.
C = Classes used for reindexing W. Size = n x 1. Values in [0,1,...,nc-1].
Output variables:
reindexed_W = reindexed adjacency matrix. Size = n x n.
reindexed_C = reindexed classes C. Size = n x 1. Values in [0,1,...,nc-1].
"""
n = C.shape[0] # nb of vertices
nc = len(np.unique(C)) # nb of communities
reindexing_mapping = np.zeros([n]) # mapping for reindexing W
reindexed_C = np.zeros([n]) # reindexed C
tot = 0
for k in range(nc):
cluster = (np.where(C==k))[0]
length_cluster = len(cluster)
x = np.array(range(tot,tot+length_cluster))
reindexing_mapping[cluster] = x
reindexed_C[x] = k
tot += length_cluster
idx_row,idx_col,val = scipy.sparse.find(W)
idx_row = reindexing_mapping[idx_row]
idx_col = reindexing_mapping[idx_col]
reindexed_W = scipy.sparse.csr_matrix((val, (idx_row, idx_col)), shape=(n, n))
return reindexed_W,reindexed_C
def graph_laplacian(W, normalized=True):
"""
Graph Laplacian Operator
Usages:
L = compute_graph_laplacian(W); # compute normalized graph Laplacian
L = compute_graph_laplacian(W,False); # compute UNnormalized graph Laplacian
Notations:
n = nb_data
Input variables:
W = Adjacency matrix. Size = n x n.
Output variables:
L = Graph Laplacian. Size = n x n.
"""
# Degree vector
d = W.sum(axis=0)
# Laplacian matrix
if not normalized:
D = scipy.sparse.diags(d.A.squeeze(), 0)
L = D - W
else:
d += np.spacing(np.array(0, W.dtype)) # d += epsilon
d = 1.0 / np.sqrt(d)
D = scipy.sparse.diags(d.A.squeeze(), 0)
I = scipy.sparse.identity(d.size, dtype=W.dtype)
L = I - D * W * D
return L
def graph_gradient(W):
"""
Graph Gradient Operator
Usage:
G = compute_graph_gradient(W); # compute normalized graph Laplacian
Notations:
n = number of nodes
m = number of edges
Input variables:
W = Adjacency matrix. Size = n x n.
Output variables:
G = Graph Gradient Operator. Size = m x n.
"""
W = W.todense()
n = W.shape[0] # number of nodes
Wtri = np.triu(W,1) # Extract upper triangular part of W
r,c = np.where(Wtri>0.0) # scipy.sparse.find
v = Wtri[r,c]
ne = len(r)
Dr = np.arange(0,ne); Dr = np.concatenate([Dr,Dr])
Dc = np.zeros([2*ne], dtype='int32')
Dc[:ne] = r
Dc[ne:2*ne] = c
Dv = np.zeros([2*ne])
Dv[:ne] = np.sqrt(v)
Dv[ne:2*ne] = -np.sqrt(v)
G = scipy.sparse.csr_matrix((Dv, (Dr, Dc)), shape=(ne, n), dtype='float32')
return G
def sortEVD(lamb, U):
idx = lamb.argsort() # increasing order
return lamb[idx], U[:,idx]
def nldr_visualization(W):
"""
Visualization technique:
Belkin-Niyogi, Laplacian eigenmaps for dimensionality reduction and data representation, 2003
Usage:
X,Y,Z = nldr_visualization(W)
Notations:
n = nb_data
Input variables:
W = Adjacency matrix. Size = n x n.
Output variables:
X = 1st data coordinates in low-dim manifold. Size n x 1.
Y = 2nd data coordinates in low-dim manifold. Size n x 1.
Z = 3rd data coordinates in low-dim manifold. Size n x 1.
"""
# Compute normalized graph Laplacian
L = graph_laplacian(W)
# Regularization for ill-posed graphs
L = L + 1e-6* scipy.sparse.identity(L.shape[0], dtype=W.dtype)
# Compute the first three Laplacian Eigenmaps
lamb, U = scipy.sparse.linalg.eigsh(L, k=4, which='SM')
# Sort eigenvalue from smallest to largest values
lamb, U = sortEVD(lamb, U)
# Coordinates of graph vertices in the low-dim embedding manifold
X = U[:,1]
Y = U[:,2]
Z = U[:,3]
return X,Y,Z
def compute_purity(C_computed,C_grndtruth,R):
"""
Clustering accuracy can be defined with the purity measure, defined here:
Yang-Hao-Dikmen-Chen-Oja, Clustering by nonnegative matrix factorization
using graph random walk, 2012.
Usages:
accuracy = compute_clustering_accuracy(C_computed,C_grndtruth,R)
Notations:
n = nb_data
Input variables:
C_computed = Computed clusters. Size = n x 1. Values in [0,1,...,R-1].
C_grndtruth = Ground truth clusters. Size = n x 1. Values in [0,1,...,R-1].
R = Number of clusters.
Output variables:
accuracy = Clustering accuracy of computed clusters.
"""
N = C_grndtruth.size
nb_of_dominant_points_in_class = np.zeros((R, 1))
w = defaultdict(list)
z = defaultdict(list)
for k in range(R):
for i in range(N):
if C_computed[i]==k:
w[k].append(C_grndtruth[i])
if len(w[k])>0:
val,nb = stats.mode(w[k])
z[k] = int(nb.squeeze())
else:
z[k] = 0
sum_dominant = 0
for k in range(R):
sum_dominant = sum_dominant + z[k]
purity = float(sum_dominant) / float(N)* 100.0
return purity
def compute_ncut(W, Cgt, R):
"""
Graph spectral clustering technique NCut:
Yu-Shi, Multiclass spectral clustering, 2003
Code available here: http://www.cis.upenn.edu/~jshi/software
Usages:
C,acc = compute_ncut(W,Cgt,R)
Notations:
n = nb_data
Input variables:
W = Adjacency matrix. Size = n x n.
R = Number of clusters.
Cgt = Ground truth clusters. Size = n x 1. Values in [0,1,...,R-1].
Output variables:
C = NCut solution. Size = n x 1. Values in [0,1,...,R-1].
acc = Accuracy of NCut solution.
"""
# Apply ncut
eigen_val, eigen_vec = ncut.ncut( W, R )
# Discretize to get cluster id
eigenvec_discrete = ncut.discretisation( eigen_vec )
res = eigenvec_discrete.dot(np.arange(1, R + 1))
C = np.array(res-1,dtype=np.int64)
# Compute accuracy
computed_solution = C
ground_truth = Cgt
acc = compute_purity( computed_solution,ground_truth, R)
return C, acc
def construct_knn_graph(X,k,dist):
"""
Usages:
(i) W = construct_knn_graph(X,k,'euclidean')
(ii) W = construct_knn_graph(X,k,'cosine')
(iii) W = construct_knn_graph(X,k,'cosine_binary')
Notations:
n = nb_data
d = data_dimensionality
Input variables:
X = Data matrix. Size = n x d.
k = Number of nearest neaighbors.
dist = Type of distances: 'euclidean' or 'cosine'
Output variables:
W = Adjacency matrix. Size = n x n.
Pre-process data (if needed)
----------------------------
>>> n = nb_data
>>> d = data_dimensionality
Center the data, i.e. x_i <- x_i - mean({x_i}), size(X) = n x d
>>> X = bsxfun(@minus, X, mean(X,1));
Normalize the variance of the data, i.e. x_i <- x_i / var({x_i}), size(X) = n x d
>>> X = bsxfun(@rdivide, X, sqrt(sum(X.^2,1))+eps);
Projection onto the sphere, i.e. ||x_i||_2=1, size(X) = n x d
>>> X = bsxfun(@rdivide, X, sqrt(sum(X.^2,2))+eps);
Normalize the data value between [0,1]:
>>> X = (X - min(X(:)))/ (max(X(:)) - min(X(:)));
or between [-1,1]:
>>> X = 2* ( (X - min(X(:)))/ (max(X(:)) - min(X(:))) - 0.5 );
"""
n = X.shape[0]
######################################
# Construct a k-NN graph with L2/Euclidean distance
######################################
if dist == 'euclidean':
print('k-NN graph with euclidean distance')
# Compute L2/Euclidean distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)
# Sort Distance matrix
idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
D.sort() # sort D from smallest to largest values
D = D[:,:k]
# Compute Weight matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
W = np.exp(- D**2 / sigma2)
# Make W sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = W.reshape(n*k)
W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = W.T > W
W = W - W.multiply(bigger) + W.T.multiply(bigger)
# No self-connections
#W.setdiag(0)
######################################
# Construct a k-NN graph with Zelnik-Perona technique
# "Self-Tuning Spectral Clustering", 2005
######################################
if dist == 'euclidean_zelnik_perona':
print('k-NN graph with Zelnik-Perona technique')
# Compute L2/Euclidean distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)
# Sort Distance matrix
idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
D.sort() # sort D from smallest to largest values
D = D[:,:k]
# Compute Weight matrix
sigma = D[:,-1]
D = np.divide( (D**2).reshape(n*k) , np.multiply(sigma[np.arange(0, n).repeat(k)],sigma[idx.reshape(n*k)]) )
W = np.exp(-D)
# Make W sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = W
W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = W.T > W
W = W - W.multiply(bigger) + W.T.multiply(bigger)
# No self-connections
#W.setdiag(0)
######################################
# Construct a k-NN graph with Cosine distance
######################################
if dist == 'cosine':
print('k-NN graph with cosine distance')
# Compute Cosine distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
D = Xl2proj.dot(Xl2proj.T)
# Sort D according in descending order
idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
D.sort(axis=1)
D[:] = D[:,::-1]
# Cosine distance
Dcos = np.abs(np.arccos(D-1e-10))
D = Dcos
D = D[:,:k]
# Compute Weight matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
W = np.exp(- D**2 / sigma2)
# Make W sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = W.reshape(n*k)
W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = W.T > W
W = W - W.multiply(bigger) + W.T.multiply(bigger)
# No self-connections
#W.setdiag(0)
######################################
# Construct a k-NN graph with Cosine distance
######################################
if dist == 'cosine_binary':
print('k-NN graph with cosine distance with binary weights')
# Compute Cosine distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
D = Xl2proj.dot(Xl2proj.T)
# Sort D according in descending order
idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
D.sort(axis=1)
D[:] = D[:,::-1]
# Cosine distance
Dcos = np.abs(np.arccos(D-1e-10))
D = Dcos
D = D[:,:k]
# Compute Weight matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
W = np.exp(- D**2 / sigma2)
# Make W sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = np.ones([n*k])
W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = W.T > W
W = W - W.multiply(bigger) + W.T.multiply(bigger)
# No self-connections
#W.setdiag(0)
#print(W.shape)
#print(W.nnz)
return W
def sortPCA(lamb, U):
idx = lamb.argsort()[::-1] # decreasing order
return lamb[idx], U[:,idx]
def compute_pca(X,nb_pca):
"""
Usages:
[PC,PD,EnPD] = compute_pca(X,nb_pca)
Notations:
n = nb_data
d = data_dimensionality
Input variables:
X = Data matrix. Size = n x d.
nb_pca = Number of principal components.
Output variables:
PC = Principal components. Size = n x nb_pca.
PD = Principal directions. Size = d x nb_pca.
EnPD = Energy/variance of the principal directions. Size = np_pca x 1.
"""
Xzc = X - np.mean(X,axis=0) # zero-centered data
n,d = X.shape
if n>d:
CovX = (Xzc.T).dot(Xzc)
dCovX = CovX.shape[0]
# Compute the eigensolution
#CovX = scipy.sparse.csr_matrix(CovX)
if nb_pca<dCovX:
lamb, U = scipy.sparse.linalg.eigsh(CovX, k=nb_pca, which='LM') # U = d x nb_pca
lamb, U = sortPCA(lamb, U)
#U = U[:,::-1]
#lamb = lamb[::-1]
else: # nb_pca==dCovX
lamb, U = np.linalg.eig(CovX)
lamb, U = sortPCA(lamb, U)
PD = U # Principal directions
PC = Xzc.dot(U) # Principal components/Projected data on PDs
EnPD = lamb # Energy of PDs
else:
#Xzc = scipy.sparse.csr_matrix(Xzc)
U,S,V = scipy.sparse.linalg.svds(Xzc, k=nb_pca, which='LM')
U = U[:,::-1]
S = S[::-1]
V = V[::-1,:]
PD = V # Principal directions
PC = U.dot(np.diag(S)) # Principal components/Projected data on PDs
EnPD = S**2 # Energy of PDs
return PC,PD,EnPD
def compute_kernel_kmeans_EM(nc,Ker,Theta,nb_trials):
"""
Usages:
[C_kmeans,En_kmeans] = compute_kernel_kmeans_EM(K,Ker,Theta,nb_trials)
Note: Code based on Michael Chen ([email protected])'s code
Notations:
n = nb_data
Input variables:
K = nb_clusters
Ker = Kernel matrix. Size = n x n.
Theta = Weight for each data term. Size = n x 1.
nb_trials = Number of kmeans runs.
Output variables:
C_kmeans = Computed kmeans clusters. Size = n x 1.
En_kmeans = Energy of the kmeans partition.
"""
start = time.time()
n = Ker.shape[0]
Theta = np.diag(np.ones(n)) # Equal weight for each data
Ones = np.ones((1,n))
En_kmeans = 1e10; # Init energy value
# Run K-Means "nb_trials" times and keep the solution that best minimizes
# the K-Means energy.
for k in range(nb_trials):
# Initialization
C = np.random.randint(nc,size=n) # random initialization
Cold = np.ones([n])
diffC = np.linalg.norm(C-Cold)/np.linalg.norm(Cold)
# Loop
k = 0
while (k<30) & (diffC>1e-2):
# Update iteration
k += 1
#print(k)
# Distance Matrix D
row = np.array(range(n))
col = C
data = np.ones(n)
F = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, nc)).todense()
O = np.diag( np.array( 1./ (Ones.dot(Theta).dot(F) + 1e-6) ).squeeze() )
T = Ker.dot(Theta.dot(F.dot(O)))
D = - 2* T + np.repeat( np.diag(O.dot((F.T).dot(Theta.dot(T))))[None,:] ,n,axis=0)
# Extract clusters
C = np.array(np.argmin(D,1)).squeeze()
# L2 difference between two successive cluster configurations
diffC = np.linalg.norm(C-Cold)/np.linalg.norm(Cold)
Cold = C
# K-Means energy
En = np.multiply( (np.repeat(np.diag(Ker)[:,None],nc,axis=1) + D) , F)
En = np.sum(En)/n
# Check energy and keep the smallest one
if En < En_kmeans:
En_kmeans = En
C_kmeans = C
# Computational time
#print('Computational time for Kernel K-Means with EM approach (sec): ',time.time() - start);
return C_kmeans, En_kmeans
def compute_kernel_kmeans_spectral(nc,Ker,Theta,nb_trials):
"""
Usages:
[C_kmeans,En_kmeans] = compute_kernel_kmeans_spectral(K,Ker,Theta,nb_trials)
Notations:
n = nb_data
Input variables:
K = nb_clusters
Ker = Kernel matrix. Size = n x n.
Theta = Weight for each data term. Size = n x 1.
nb_trials = Number of standard kmeans runs.
Output variables:
C_kmeans = Computed kmeans clusters. Size = n x 1.
En_kmeans = Energy of the kmeans partition.
"""
start = time.time()
n = Ker.shape[0]
Theta = np.diag(Theta) # Weight for each data
Ones = np.ones((1,n))
# Eigenvalue Decomposition (EVD)
A = (pow(Theta,0.5)).dot( Ker.dot(pow(Theta,0.5)) )
lamb, U = scipy.sparse.linalg.eigsh(A, k=nc, which='LM')
U = U[:,::-1] # largest = index 0
lamb = lamb[::-1]
# Pre-process embedding coordinates Y
Y = U - np.mean(U,axis=0) # zero-centered data
Y = ( Y.T / np.sqrt(np.sum(Y**2,axis=1)+1e-10) ).T # L2 normalization of rows of Y
# Run Standard/Linear K-Means on embedding coordinates Y
Ker = construct_kernel(Y,'linear')
C, En = compute_kernel_kmeans_EM(nc,Ker,Theta,10)
# Outputs
C_kmeans = C
En_kmeans = En
# Computational time
#print('Computational time for Kernel K-Means with Spectral approach (sec):',time.time() - start);
return C_kmeans, En_kmeans
def plant_seeds(C, n, R, seeds_per_class):
"""
Incremental Reseeding Algorithm for Clustering
Xavier Bresson, Huiyi Hu, Thomas Laurent, Arthur Szlam, James von Brecht
arXiv:1406.3837
"""
# Init
F = np.zeros((n,R))
Bal = np.zeros(R)
# Construct indicators of seeds
for k in range(R):
idx_in_class_k = [ i for i,item in enumerate(C) if C[i]==k ]
size_class_k = len(idx_in_class_k)
if size_class_k > n:
size_class_k = int(n)
seed_idx_k = np.random.permutation(range(size_class_k))
seed_idx_k = seed_idx_k[0:int(seeds_per_class)]
seed_idx_k = [ idx_in_class_k[item] for i,item in enumerate(seed_idx_k) ]
F[seed_idx_k,k] = 1
return F
def compute_pcut(W,Cgt,R,speed=5.0,max_nb_iter=50,display=False):
# Compute Random Walk Transition Matrix
D = scipy.sparse.csc_matrix.sum(W,axis=1) # sum along rows
Dinv = 1/D
Dinv = Dinv.squeeze()
n = D.shape[0]
invD = scipy.sparse.spdiags(Dinv,0,n,n)
RW = W * invD
# Incremental Clustering
speed = float(speed)
maxiter = int(max_nb_iter)
delta_seeds = np.array(speed*10**-4*n/R)
nb_of_seeds = np.array(1) # initial nb of seeds
seeds_per_class = nb_of_seeds
eps = np.array(10**-6)
seed_idx = np.array(np.random.permutation(range(n)),dtype=np.int64)
seed_idx = seed_idx[0:nb_of_seeds*R]
C = np.random.randint(R,size=n)
purity_tab = np.zeros(maxiter)
# Loop
for iter in range(maxiter):
# Plant Seeds
F = plant_seeds(C,n,R,seeds_per_class)
# Random Walk
k = 0
while (np.min(F)<eps) & (k<50):
F = RW.dot(F)
k += 1
# Thresholding
C = np.argmax(F,axis=1)
# Update seeds
nb_of_seeds = nb_of_seeds + delta_seeds
seeds_per_class = round(nb_of_seeds)
# Purity
if display & (not iter%10):
print('iter=', iter, ', accuracy=', compute_purity(C,Cgt,R))
return C, compute_purity(C,Cgt,R)
#def construct_kernel(X,type_kernel):
def construct_kernel(X,type_kernel,parameters=None):
"""
Usages:
(i) Ker = construct_kernel(X,'linear'); # Ker = <Xi,Xj>
(ii) Ker = construct_kernel(X,'polynomial',[a,b,c]); # Ker = ( a* <Xi,Xj> + b )^c
(iii) Ker = construct_kernel(X,'gaussian'); # Ker = exp( -|Xi-Xj|_2^2 / a ), automatic a
(iv) Ker = construct_kernel(X,'gaussian',a); # Ker = exp( -|Xi-Xj|_2^2 / a ), given a
(v) Ker = construct_kernel(X,'sigmoid',[a,b]); # Ker = tanh( a* <Xi,Xj> + b )
(vi) Ker = construct_kernel(X,'kNN_gaussian',k);
(vii) Ker = construct_kernel(X,'kNN_cosine',k);
(viii) Ker = construct_kernel(X,'kNN_cosine_binary',k);
Notations:
n = nb_data
d = data_dimensionality
k = nb_nearest_neighbors
Input variables:
X = Data matrix. Size = n x d.
type_kernel = Type of kernels: 'linear', 'polynomial', 'gaussian', 'sigmoid', 'kNN'.
Output variables:
Ker = Kernel matrix. Size = n x n.
"""
start = time.time()
n = X.shape[0]
####################################################
# Linear Kernel = <Xi,Xj>
# Ker = <Xi,Xj>
####################################################
if type_kernel=='linear':
print('Construct Linear Kernel')
if isinstance(X, np.ndarray)==False:
X = X.toarray()
# Compute D = <Xi,Xj> for all pairwise data points size(D) = n x n
Ker = X.dot(X.T)
####################################################
# Gaussian Kernel
# Ker = exp( -|Xi-Xj|_2^2 / a ), automatic a
####################################################
if type_kernel=='gaussian':
print('Construct Gaussian Kernel')
# Compute L2/Euclidean distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
#Xzc = X - np.mean(X,axis=0) # zero-centered data
D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)
Ddist = 1.0* D
# Parameters
if parameters==None:
k = round(n/100.)
#print('k=',k)
# Sort Distance matrix
idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
D.sort() # sort D from smallest to largest values
D = D[:,:k]
# Compute Ker matrix
sigma2 = np.mean(D[:,-1])**2 # kernel scale
#print('sigma2c',sigma2)
else:
sigma = parameters
sigma2 = sigma**2
#print('sigma2c',sigma2)
Ker = np.exp(- Ddist**2 / sigma2)
# Make Ker is symmetric
bigger = Ker.T > Ker
Ker = Ker - Ker.dot(bigger) + Ker.T.dot(bigger)
####################################################
# Polynomial Kernel
# Ker = ( a* <Xi,Xj> + b )^c
####################################################
if type_kernel=='polynomial':
print('Construct Polynomial Kernel')
# Parameters
a = parameters[0]
b = parameters[1]
c = parameters[2]
# Compute Cosine distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
D = Xl2proj.dot(Xl2proj.T)
# Polynomial Kernel
Ker = pow( np.abs( a* D + b ) , c )
# Make Ker is symmetric
bigger = Ker.T > Ker
Ker = Ker - Ker.dot(bigger) + Ker.T.dot(bigger)
####################################################
# kNN Gaussian Kernel
####################################################
if type_kernel=='kNN_gaussian':
print('Construct kNN Gaussian Kernel')
# Parameters
k = parameters
# Compute L2/Euclidean distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
#Xzc = X - np.mean(X,axis=0) # zero-centered data
D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)
# Sort Distance matrix
idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
D.sort() # sort D from smallest to largest values
D = D[:,:k]
# Compute Ker matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
Ker_val = np.exp(- D**2 / sigma2)
# Make Ker sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = Ker_val.reshape(n*k)
Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make Ker is symmetric
bigger = Ker.T > Ker
Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
Ker = Ker.todense()
####################################################
# kNN Cosine Kernel
####################################################
if type_kernel=='kNN_cosine':
print('Construct kNN Cosine Kernel')
# Parameters
k = parameters
# Compute Cosine distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
D = Xl2proj.dot(Xl2proj.T)
# Sort D according in descending order
idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
D.sort(axis=1)
D[:] = D[:,::-1]
# Cosine distance
Dcos = np.abs(np.arccos(D-1e-10))
D = Dcos
D = D[:,:k]
# Compute Ker matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
Ker_val = np.exp(- D**2 / sigma2)
# Make Ker sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = Ker_val.reshape(n*k)
Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = Ker.T > Ker
Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
Ker = Ker.todense()
####################################################
# kNN Cosine Binary Kernel
####################################################
if type_kernel=='kNN_cosine_binary':
print('Construct kNN Cosine Binary Kernel')
# Parameters
k = parameters
# Compute Cosine distance between all pairs of points
if isinstance(X, np.ndarray)==False:
X = X.toarray()
Xzc = X - np.mean(X,axis=0) # zero-centered data
Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
D = Xl2proj.dot(Xl2proj.T)
# Sort D according in descending order
idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
D.sort(axis=1)
D[:] = D[:,::-1]
# Cosine distance
Dcos = np.abs(np.arccos(D-1e-10))
D = Dcos
D = D[:,:k]
# Compute Ker matrix
sigma2 = np.mean(D[:,-1])**2 # graph scale
Ker_val = np.exp(- D**2 / sigma2)
# Make Ker sparse
n = X.shape[0]
row = np.arange(0, n).repeat(k)
col = idx.reshape(n*k)
data = np.ones([n*k])
Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))
# Make W is symmetric
bigger = Ker.T > Ker
Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
Ker = Ker.todense()
#print('Computational time for Kernel construction (sec):',time.time() - start)
return Ker
def compute_SVM(Xtrain,Cgt_train,l_train,type_svm,param_svm=None,Xtest=None,Cgt_test=None,plot=None,type_graph=None,param_graph=None):
"""
Usage:
[alpha,f_test,C_test,acc_test] = compute_SVM(Xtrain,l,Xtest,Cgt_test,gammaG,type_graph,parameters_graph,gamma,type_kernel,parameters)
Notations:
n_train = nb of training data
n_test = nb of test data
d = data_dimensionality
k = nb_nearest_neighbors
Input variables:
Xtrain = Training data matrix. Size = n_train x 1.
l = Label vector. Size = n_train x 1. Values = +/-1.
Xtest = Test data matrix. Size = n_test x 1.
Cgt_test = Ground truth clusters of the test dataset. Size = n_test x 1.
gammaG = Graph regularization paramater.
type_graph = Type of graphs: 'no_graph', 'euclidean', 'euclidean_zelnik_perona', 'cosine', 'cosine_binary'
gamma = SVM regularization paramater.
type_kernel = Type of kernels: 'linear', 'polynomial', 'gaussian', 'sigmoid', 'kNN'.
parameters = parameters used for Kernel construction
Output variables:
alpha = Coefficient vector of classification function. Size = n_train x 1.
f_test = Continuous decision function. Size = n_test x 1.
C_test = Binary decision function. Size = n_test x 1.
acc_test = Accuracy of SVM classification on test dataset.
"""
# Parameters
n = Xtrain.shape[0]
nc = len(np.unique(Cgt_train))
gamma = param_svm[0]
if not (plot==False):
fig_id = plot[0]
size_vertex_plot = plot[1]
# Type of SVM technique
if type_graph==None:
if type_svm=='soft_linear':
print('Run Linear SVM')
if type_svm=='gaussian_kernel':
print('Run Kernel SVM')
if not (type_graph==None):
if type_svm=='gaussian_kernel':
print('Run Graph SVM with Gaussian Kernel')
# Compute kernel
if type_svm=='soft_linear':
Ker = construct_kernel(Xtrain,'linear')
if type_svm=='gaussian_kernel':
if len(param_svm)<2: Ker = construct_kernel(Xtrain,'gaussian') # TODO
sigma = param_svm[1]
Ker = construct_kernel(Xtrain,'gaussian',sigma)
# Compute graph
if not (type_graph==None):
kNN = param_graph[0]
gammaG = param_graph[1]
W = construct_knn_graph(Xtrain,kNN,type_graph)
Lap = graph_laplacian(W).todense()
# Compute test kernel for classification error
if type_svm=='soft_linear':
KXtest = Xtrain.dot(Xtest.T)
if type_svm=='gaussian_kernel':
Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain,Xtest, metric='euclidean', n_jobs=1)
sigma2 = sigma**2
KXtest = np.exp(- Ddist**2 / sigma2)
# Compute Indicator function of labels
H = np.zeros([n])
H[np.abs(l_train)>0.0] = 1
H = np.diag(H)
# Compute L, Q
L = np.diag(l_train)
l = l_train
T = np.eye(n)
if not (type_graph==None):
T += gammaG* Lap.dot(Ker)
Tinv = np.linalg.inv(T)
Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))
# Time steps
sigma = 1./ np.linalg.norm(L,2)
tau = 1./ np.linalg.norm(Q,2)
# For conjuguate gradient
Acg = tau* Q + np.eye(n)
# Initialization
x = np.zeros([n])
y = 0.0
xold = x
# Plot
if not (plot==None):
fig = plt.figure(fig_id)
fig.canvas.draw()
plt.show(block=False)
# Loop
k = 0
diffx = 1e6
while (diffx>1e-3) & (k<1000):
# Update iteration
k += 1
# Update x
# Approximate solution with conjuguate gradient
b = x + tau - tau* l* y
x,_ = scipy.sparse.linalg.cg(Acg, b, x0=x, tol=1e-3, maxiter=50)
x[x<0.0] = 0 # Projection on [0,gamma]
x[x>gamma] = gamma
# Update y
y = y + sigma* l.T.dot(x)
# Stopping condition
diffx = np.linalg.norm(x-xold)
xold = x
# Plot
if not(k%50):
# Lagrangian multipliers
alpha = x
# a vector
a = Tinv.dot(H.dot(L.dot(alpha)))
# b offset
idx_unlabeled_data = np.where( np.abs(l)<1./2 )
alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
nb_sv = len(Isv.nonzero()[0])
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(a) )/ nb_sv
else:
b = 0
# Continuous decision function
f_test = a.T.dot(KXtest) + b
# Binary decision function
C_test = np.array( 1./2* ( 1+ np.sign(f_test) ) , dtype=np.int64) # decision function in {0,1}
acc_test = compute_purity(C_test,Cgt_test,nc)
# Plot
if not (plot==None):
plt.subplot(121)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test)
plt.title('Continuous decision function')
plt.subplot(122)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test)
plt.title('Decision function, iter=' + str(k) + ', acc=' + str(round(acc_test,3)) )
plt.tight_layout()
plt.show()
fig.canvas.draw()
time.sleep(0.01)
# Final operations
# Lagrangian multipliers
alpha = x
# a vector
a = Tinv.dot(H.dot(L.dot(alpha)))
# b offset
idx_unlabeled_data = np.where( np.abs(l)<1./2 )
alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
nb_sv = len(Isv.nonzero()[0])
if nb_sv > 1:
b = (Isv.T).dot( l - Ker.dot(a) )/ nb_sv
else:
b = 0
#print('b=',b)
# Continuous decision function
f_test = a.T.dot(KXtest) + b
# Binary decision function
C_test = np.array( 1./2* ( 1+ np.sign(f_test) ) , dtype=np.int64) # decision function in {0,1}
acc_test = compute_purity(C_test,Cgt_test,nc)
# Plot
if not (plot==None):
plt.subplot(121)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test)
plt.title('Continuous decision function')
plt.subplot(122)
plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test)
plt.title('Decision function, iter=' + str(k) + ', acc=' + str(round(acc_test,3)) )
plt.tight_layout()
plt.show()
fig.canvas.draw()
time.sleep(0.01)
return alpha, f_test, C_test, acc_test
def shrink(x,mu):
"""Soft shrinkage operator"""
s = np.sqrt(x**2)
ss = s - mu
ss = ss* ( ss>0.0 )
s = s + ( s<mu )
ss = ss/ s
res = ss* x
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment