jiqiujia · December 24, 2016 08:39
diff --git a/ncut.py b/ncut.py
 # ### python_ncut_lib.py
 # Copyright (C) 2010 R. Cameron Craddock ([email protected])
 #
 # This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
 # data. It provides the library functions for performing normalized cut clustering according to:
 #
 # Shi, J., & Malik, J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis 
 # and Machine Intelligence, 22(8), 888-905. doi: 10.1109/34.868688.
 #
 # Yu, S. X., & Shi, J. (2003). Multiclass spectral clustering. Proceedings Ninth IEEE International Conference
 # on Computer Vision, (1), 313-319 vol.1. Ieee. doi: 10.1109/ICCV.2003.1238361.
 #
 # This code is a port of the NcutClustering_7 matlab toolbox available here: http://www.cis.upenn.edu/~jshi/software/
 #
 # For more information refer to:
 #
 # Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole 
 # brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping, 
 # doi: 10.1002/hbm.21333.
 #
 # Documentation, updated source code and other information can be found at the NITRC web page:
 # http://www.nitrc.org/projects/cluster_roi/
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 # 
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ####

 # this scripts requires NumPy (numpy.scipy.org) and SciPy (www.scipy.org) to be 
 # installed in a directory that is accessible through PythonPath 
 import sys
 from numpy import array, reshape, shape, matrix, ones, zeros, sqrt, sort, arange
 from numpy import nonzero, fromfile, tile, append, prod, double, argsort, sign
 from numpy import kron, multiply, divide, abs, reshape, asarray
 from scipy import rand
 from scipy.sparse import csc_matrix, spdiags
 from scipy.sparse.linalg.eigen.arpack import eigsh
 from scipy.linalg import norm, svd, LinAlgError

 # exception hander for singular value decomposition
 class SVDError(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


 # (eigen_val, eigen_vec) = ncut( W, nbEigenValues ):
 #
 # This function performs the first step of normalized cut spectral clustering. The normalized LaPlacian
 # is calculated on the similarity matrix W, and top nbEigenValues eigenvectors are calculated. The number of 
 # eigenvectors corresponds to the maximum number of classes (K) that will be produced by the clustering algorithm. 
 #
 #    W:             symmetric #feature x #feature sparse matrix representing the similarity between voxels, 
 #                   traditionally this matrix should be positive semidefinite, but a trick is implemented to 
 #                   allow negative matrix entries
 #    nvEigenValues: number of eigenvectors that should be calculated, this determines the maximum number of 
 #                   clusters (K) that can be derived from the result
 #    eigen_val:     (output) eigenvalues from the eigen decomposition of the LaPlacian of W
 #    eigen_vec:     (output) eigenvectors from the eign decomposition of the LaPlacian of W
 #
 def ncut(W, nbEigenValues):
    # parameters
    offset = .5
    maxiterations = 100
    eigsErrorTolerence = 1e-6
    truncMin = 1e-6
    eps = 2.2204e-16

    m = shape(W)[1]

    # make sure that W is symmetric, this is a computationally expensive operation, only use for debugging
    #if (W-W.transpose()).sum() != 0:
    #	print "W should be symmetric!"
    #	exit(0)

    # degrees and regularization
    # S Yu Understanding Popout through Repulsion CVPR 2001
    # Allows negative values as well as improves invertability
    # of d for small numbers
    # i bet that this is what improves the stability of the eigen
    d = abs(W).sum(0)
    dr = 0.5 * (d - W.sum(0))
    d = d + offset * 2
    dr = dr + offset

    # calculation of the normalized LaPlacian
    W = W + spdiags(dr, [0], m, m, "csc")
    Dinvsqrt = spdiags((1.0 / sqrt(d + eps)), [0], m, m, "csc")
    P = Dinvsqrt * (W * Dinvsqrt);

    # perform the eigen decomposition
    #eigen_val, eigen_vec = eigsh(P, nbEigenValues, maxiter=maxiterations, tol=eigsErrorTolerence, which='LA')
    eigen_val, eigen_vec = eigsh(P, nbEigenValues, tol=eigsErrorTolerence, which='LA')

    # sort the eigen_vals so that the first
    # is the largest
    i = argsort(-eigen_val)
    eigen_val = eigen_val[i]
    eigen_vec = eigen_vec[:, i]

    # normalize the returned eigenvectors
    eigen_vec = Dinvsqrt * matrix(eigen_vec)
    norm_ones = norm(ones((m, 1)))
    for i in range(0, shape(eigen_vec)[1]):
        eigen_vec[:, i] = (eigen_vec[:, i] / norm(eigen_vec[:, i])) * norm_ones
        if eigen_vec[0, i] != 0:
            eigen_vec[:, i] = -1 * eigen_vec[:, i] * sign(eigen_vec[0, i])

    return (eigen_val, eigen_vec)


 # eigenvec_discrete=discretisation( eigen_vec ):
 #
 # This function performs the second step of normalized cut clustering which assigns features to clusters 
 # based on the eigen vectors from the LaPlacian of a similarity matrix. There are a few different ways to
 # perform this task. Shi and Malik (2000) iteratively bisect the features based on the positive and 
 # negative loadings of the eigenvectors. Ng, Jordan and Weiss (2001) proposed to perform K-means clustering
 # on the rows of the eigenvectors. The method implemented here was proposed by Yu and Shi (2003) and it finds
 # a discrete solution by iteratively rotating a binaised set of vectors until they are maximally similar to
 # the the eigenvectors (for more information, the full citation is at the top of this file). An advantage
 # of this method over K-means is that it is _more_ deterministic, i.e. you should get very similar results
 # every time you run the algorithm on the same data.
 #
 # The number of clusters that the features are clustered into is determined by the number of eignevectors 
 # (number of columns) in the input array eigen_vec. A caveat of this method, is that number of resulting
 # clusters is bound by the number of eignevectors, but it may contain less.
 #
 #    eigen_vec:          Eigenvectors of the normalized LaPlacian calculated from the similarity matrix 
 #                        for the corresponding clustering problem
 #    eigen_vec_discrete: (output) discretised eigenvectors, i.e. vectors of 0 and 1 which indicate 
 #                        wether or not a feature belongs to the cluster defined by the eigen vector.
 #                        I.E. a one in the 10th row of the 4th eigenvector (column) means that feature
 #                        10 belongs to cluster #4.
 # 
 def discretisation(eigen_vec):
    eps = 2.2204e-16

    # normalize the eigenvectors
    [n, k] = shape(eigen_vec)
    vm = kron(ones((1, k)), sqrt(multiply(eigen_vec, eigen_vec).sum(1)))
    eigen_vec = divide(eigen_vec, vm)

    svd_restarts = 0
    exitLoop = 0

    ### if there is an exception we try to randomize and rerun SVD again
    ### do this 30 times
    while (svd_restarts < 30) and (exitLoop == 0):

        # initialize algorithm with a random ordering of eigenvectors
        c = zeros((n, 1))
        R = matrix(zeros((k, k)))
        R[:, 0] = eigen_vec[int(rand(1) * (n)), :].transpose()

        for j in range(1, k):
            c = c + abs(eigen_vec * R[:, j - 1])
            R[:, j] = eigen_vec[c.argmin(), :].transpose()

        lastObjectiveValue = 0
        nbIterationsDiscretisation = 0
        nbIterationsDiscretisationMax = 20

        # iteratively rotate the discretised eigenvectors until they
        # are maximally similar to the input eignevectors, this
        # converges when the differences between the current solution
        # and the previous solution differs by less than eps or we
        # we have reached the maximum number of itarations
        while exitLoop == 0:
            nbIterationsDiscretisation = nbIterationsDiscretisation + 1

            # rotate the original eigen_vectors
            tDiscrete = eigen_vec * R

            # discretise the result by setting the max of each row=1 and
            # other values to 0
            j = reshape(asarray(tDiscrete.argmax(1)), n)
            eigenvec_discrete = csc_matrix((ones(len(j)), (range(0, n), array(j))), shape=(n, k))

            # calculate a rotation to bring the discrete eigenvectors cluster to the
            # original eigenvectors
            tSVD = eigenvec_discrete.transpose() * eigen_vec
            # catch a SVD convergence error and restart
            try:
                U, S, Vh = svd(tSVD)
                svd_restarts += 1
            except LinAlgError:
                # catch exception and go back to the beginning of the loop
                print >> sys.stderr, "SVD did not converge, randomizing and trying again"
                break

            # test for convergence
            NcutValue = 2 * (n - S.sum())
            if ((abs(NcutValue - lastObjectiveValue) < eps ) or
                    ( nbIterationsDiscretisation > nbIterationsDiscretisationMax )):
                exitLoop = 1
            else:
                # otherwise calculate rotation and continue
                lastObjectiveValue = NcutValue
                R = matrix(Vh).transpose() * matrix(U).transpose()

    if exitLoop == 0:
        raise SVDError("SVD did not converge after 30 retries")
    else:
        return (eigenvec_discrete)


diff --git a/utils.py b/utils.py
 import numpy as np
 import scipy.sparse
 from collections import defaultdict
 from scipy import stats
 import ncut
 import sklearn.metrics.pairwise
 import time

 import matplotlib.pyplot as plt
 import time







 def reindex_W_with_classes(W,C):
    """
    Function that reindexes W according to communities/classes

    Usage:
      [reindexed_W,reindexed_C] = reindex_W_with_C(W,C)

    Notations:
      n = nb_data
      nc = nb_communities

    Input variables:
      W = Adjacency matrix. Size = n x n.
      C = Classes used for reindexing W. Size = n x 1. Values in [0,1,...,nc-1].

    Output variables:
      reindexed_W = reindexed adjacency matrix. Size = n x n.
      reindexed_C = reindexed classes C. Size = n x 1. Values in [0,1,...,nc-1].
    """

    n = C.shape[0] # nb of vertices
    nc = len(np.unique(C)) # nb of communities
    reindexing_mapping = np.zeros([n]) # mapping for reindexing W
    reindexed_C = np.zeros([n]) # reindexed C
    tot = 0
    for k in range(nc):
        cluster = (np.where(C==k))[0]
        length_cluster = len(cluster)
        x = np.array(range(tot,tot+length_cluster))
        reindexing_mapping[cluster] = x
        reindexed_C[x] = k
        tot += length_cluster
        
    idx_row,idx_col,val = scipy.sparse.find(W)
    idx_row = reindexing_mapping[idx_row]
    idx_col = reindexing_mapping[idx_col]
    reindexed_W = scipy.sparse.csr_matrix((val, (idx_row, idx_col)), shape=(n, n))

    return reindexed_W,reindexed_C




 def graph_laplacian(W, normalized=True):
    """
    Graph Laplacian Operator

    Usages:
      L = compute_graph_laplacian(W); # compute normalized graph Laplacian
      L = compute_graph_laplacian(W,False); # compute UNnormalized graph Laplacian

    Notations:
      n = nb_data

    Input variables:
      W = Adjacency matrix. Size = n x n.

    Output variables:
      L = Graph Laplacian. Size = n x n.
    """

    # Degree vector
    d = W.sum(axis=0)

    # Laplacian matrix
    if not normalized:
        D = scipy.sparse.diags(d.A.squeeze(), 0)
        L = D - W
    else:
        d += np.spacing(np.array(0, W.dtype)) # d += epsilon
        d = 1.0 / np.sqrt(d)
        D = scipy.sparse.diags(d.A.squeeze(), 0)
        I = scipy.sparse.identity(d.size, dtype=W.dtype)
        L = I - D * W * D
    return L








 def graph_gradient(W):
    """
    Graph Gradient Operator

    Usage:
      G = compute_graph_gradient(W); # compute normalized graph Laplacian

    Notations:
      n = number of nodes
      m = number of edges

    Input variables:
      W = Adjacency matrix. Size = n x n.

    Output variables:
      G = Graph Gradient Operator. Size = m x n.
    """

    W = W.todense()
    n = W.shape[0] # number of nodes
    Wtri = np.triu(W,1) # Extract upper triangular part of W 
    r,c = np.where(Wtri>0.0) # scipy.sparse.find
    v = Wtri[r,c]    
    ne = len(r)
    Dr = np.arange(0,ne); Dr = np.concatenate([Dr,Dr])
    Dc = np.zeros([2*ne], dtype='int32')
    Dc[:ne] = r
    Dc[ne:2*ne] = c
    Dv = np.zeros([2*ne])
    Dv[:ne] = np.sqrt(v)
    Dv[ne:2*ne] = -np.sqrt(v)
    G = scipy.sparse.csr_matrix((Dv, (Dr, Dc)), shape=(ne, n), dtype='float32')

    return G






 def sortEVD(lamb, U):
    idx = lamb.argsort() # increasing order
    return lamb[idx], U[:,idx]

 def nldr_visualization(W):
    """
    Visualization technique:
      Belkin-Niyogi, Laplacian eigenmaps for dimensionality reduction and data representation, 2003

    Usage:
      X,Y,Z = nldr_visualization(W)

    Notations:
      n = nb_data

    Input variables:
      W = Adjacency matrix. Size = n x n.

    Output variables:
      X = 1st data coordinates in low-dim manifold. Size n x 1.
      Y = 2nd data coordinates in low-dim manifold. Size n x 1.
      Z = 3rd data coordinates in low-dim manifold. Size n x 1.
    """

    # Compute normalized graph Laplacian
    L = graph_laplacian(W)
    
    # Regularization for ill-posed graphs
    L = L + 1e-6* scipy.sparse.identity(L.shape[0], dtype=W.dtype)

    # Compute the first three Laplacian Eigenmaps
    lamb, U = scipy.sparse.linalg.eigsh(L, k=4, which='SM')
    
    # Sort eigenvalue from smallest to largest values
    lamb, U = sortEVD(lamb, U)
    
    # Coordinates of graph vertices in the low-dim embedding manifold
    X = U[:,1]
    Y = U[:,2]
    Z = U[:,3]

    return X,Y,Z






 def compute_purity(C_computed,C_grndtruth,R):
    """
    Clustering accuracy can be defined with the purity measure, defined here:
      Yang-Hao-Dikmen-Chen-Oja, Clustering by nonnegative matrix factorization
      using graph random walk, 2012.

    Usages:
      accuracy = compute_clustering_accuracy(C_computed,C_grndtruth,R)

    Notations:
      n = nb_data

    Input variables:
      C_computed = Computed clusters. Size = n x 1. Values in [0,1,...,R-1].
      C_grndtruth = Ground truth clusters. Size = n x 1. Values in [0,1,...,R-1].
      R = Number of clusters.

    Output variables:
      accuracy = Clustering accuracy of computed clusters.
    """

    N = C_grndtruth.size
    nb_of_dominant_points_in_class = np.zeros((R, 1))
    w = defaultdict(list)
    z = defaultdict(list)       
    for k in range(R):
        for i in range(N):
            if C_computed[i]==k:
                w[k].append(C_grndtruth[i])
        if len(w[k])>0:
            val,nb = stats.mode(w[k])
            z[k] = int(nb.squeeze()) 
        else:
            z[k] = 0
    sum_dominant = 0
    for k in range(R):
        sum_dominant = sum_dominant + z[k]
    purity = float(sum_dominant) / float(N)* 100.0
    return purity





 def compute_ncut(W, Cgt, R):
    """
    Graph spectral clustering technique NCut:
      Yu-Shi, Multiclass spectral clustering, 2003
      Code available here: http://www.cis.upenn.edu/~jshi/software

    Usages:
      C,acc = compute_ncut(W,Cgt,R)

    Notations:
      n = nb_data

    Input variables:
      W = Adjacency matrix. Size = n x n.
      R = Number of clusters.
      Cgt = Ground truth clusters. Size = n x 1. Values in [0,1,...,R-1].

    Output variables:
      C = NCut solution. Size = n x 1. Values in [0,1,...,R-1].
      acc = Accuracy of NCut solution.
    """

    # Apply ncut
    eigen_val, eigen_vec = ncut.ncut( W, R )
    
    # Discretize to get cluster id
    eigenvec_discrete = ncut.discretisation( eigen_vec )
    res = eigenvec_discrete.dot(np.arange(1, R + 1)) 
    C = np.array(res-1,dtype=np.int64)
    
    # Compute accuracy
    computed_solution = C
    ground_truth = Cgt
    acc = compute_purity( computed_solution,ground_truth, R)

    return C, acc








 def construct_knn_graph(X,k,dist):
    """
    Usages:
    (i)   W = construct_knn_graph(X,k,'euclidean')
    (ii)  W = construct_knn_graph(X,k,'cosine')
    (iii) W = construct_knn_graph(X,k,'cosine_binary')

    Notations:
      n = nb_data
      d = data_dimensionality

    Input variables:
      X = Data matrix. Size = n x d.
      k = Number of nearest neaighbors.
      dist = Type of distances: 'euclidean' or 'cosine'

    Output variables:
      W = Adjacency matrix. Size = n x n.

    Pre-process data (if needed)
    ----------------------------

    >>> n = nb_data
    >>> d = data_dimensionality

    Center the data, i.e. x_i <- x_i - mean({x_i}), size(X) = n x d
    >>> X = bsxfun(@minus, X, mean(X,1));

    Normalize the variance of the data, i.e. x_i <- x_i / var({x_i}), size(X) = n x d
    >>> X = bsxfun(@rdivide, X, sqrt(sum(X.^2,1))+eps);

    Projection onto the sphere, i.e. ||x_i||_2=1, size(X) = n x d
    >>> X = bsxfun(@rdivide, X, sqrt(sum(X.^2,2))+eps);

    Normalize the data value between [0,1]:
    >>> X = (X - min(X(:)))/ (max(X(:)) - min(X(:)));

    or between [-1,1]:
    >>> X = 2* ( (X - min(X(:)))/ (max(X(:)) - min(X(:))) - 0.5 );
    """

    n = X.shape[0]    
    
    ######################################
    # Construct a k-NN graph with L2/Euclidean distance
    ######################################
    if dist == 'euclidean':
        
        print('k-NN graph with euclidean distance')
        
        # Compute L2/Euclidean distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)

        # Sort Distance matrix
        idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
        D.sort() # sort D from smallest to largest values
        D = D[:,:k]

        # Compute Weight matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        W = np.exp(- D**2 / sigma2)

        # Make W sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = W.reshape(n*k)
        W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = W.T > W
        W = W - W.multiply(bigger) + W.T.multiply(bigger)

        # No self-connections
        #W.setdiag(0)
        


    ######################################
    # Construct a k-NN graph with Zelnik-Perona technique
    # "Self-Tuning Spectral Clustering", 2005
    ######################################
    if dist == 'euclidean_zelnik_perona':
        
        print('k-NN graph with Zelnik-Perona technique')
        
        # Compute L2/Euclidean distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)

        # Sort Distance matrix
        idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
        D.sort() # sort D from smallest to largest values
        D = D[:,:k]

        # Compute Weight matrix
        sigma = D[:,-1]
        D = np.divide( (D**2).reshape(n*k) , np.multiply(sigma[np.arange(0, n).repeat(k)],sigma[idx.reshape(n*k)]) )
        W = np.exp(-D)

        # Make W sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = W
        W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = W.T > W
        W = W - W.multiply(bigger) + W.T.multiply(bigger)

        # No self-connections
        #W.setdiag(0)
        

        

    ######################################
    # Construct a k-NN graph with Cosine distance
    ######################################
    if dist == 'cosine':
        
        print('k-NN graph with cosine distance')
        
        # Compute Cosine distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
        D = Xl2proj.dot(Xl2proj.T)

        # Sort D according in descending order     
        idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
        D.sort(axis=1)
        D[:] = D[:,::-1]

        # Cosine distance
        Dcos = np.abs(np.arccos(D-1e-10))
        D = Dcos
        D = D[:,:k]

        # Compute Weight matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        W = np.exp(- D**2 / sigma2)

        # Make W sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = W.reshape(n*k)
        W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = W.T > W
        W = W - W.multiply(bigger) + W.T.multiply(bigger)

        # No self-connections
        #W.setdiag(0)



    ######################################
    # Construct a k-NN graph with Cosine distance
    ######################################
    if dist == 'cosine_binary':
        
        print('k-NN graph with cosine distance with binary weights')
        
        # Compute Cosine distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
        D = Xl2proj.dot(Xl2proj.T)

        # Sort D according in descending order     
        idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
        D.sort(axis=1)
        D[:] = D[:,::-1]

        # Cosine distance
        Dcos = np.abs(np.arccos(D-1e-10))
        D = Dcos
        D = D[:,:k]

        # Compute Weight matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        W = np.exp(- D**2 / sigma2)

        # Make W sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = np.ones([n*k])
        W = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = W.T > W
        W = W - W.multiply(bigger) + W.T.multiply(bigger)

        # No self-connections
        #W.setdiag(0)

        
    #print(W.shape)
    #print(W.nnz)
    
    return W





 def sortPCA(lamb, U):
    idx = lamb.argsort()[::-1] # decreasing order
    return lamb[idx], U[:,idx]

 def compute_pca(X,nb_pca):
    """
    Usages:
      [PC,PD,EnPD] = compute_pca(X,nb_pca)

    Notations:
      n = nb_data
      d = data_dimensionality

    Input variables:
      X = Data matrix. Size = n x d.
      nb_pca = Number of principal components.

    Output variables:
      PC = Principal components. Size = n x nb_pca.
      PD = Principal directions. Size = d x nb_pca.
      EnPD = Energy/variance of the principal directions. Size = np_pca x 1.
    """

    Xzc = X - np.mean(X,axis=0) # zero-centered data
    
    n,d = X.shape
    if n>d:
        CovX = (Xzc.T).dot(Xzc) 
        dCovX = CovX.shape[0]
        # Compute the eigensolution
        #CovX = scipy.sparse.csr_matrix(CovX)
        if nb_pca<dCovX:
            lamb, U = scipy.sparse.linalg.eigsh(CovX, k=nb_pca, which='LM') # U = d x nb_pca
            lamb, U = sortPCA(lamb, U)  
            #U = U[:,::-1]
            #lamb = lamb[::-1]
        else: # nb_pca==dCovX
            lamb, U = np.linalg.eig(CovX)
            lamb, U = sortPCA(lamb, U)        
        PD = U # Principal directions
        PC = Xzc.dot(U) # Principal components/Projected data on PDs
        EnPD = lamb # Energy of PDs
        
    else:
        #Xzc = scipy.sparse.csr_matrix(Xzc)
        U,S,V = scipy.sparse.linalg.svds(Xzc, k=nb_pca, which='LM')
        U = U[:,::-1]
        S = S[::-1]
        V = V[::-1,:]
        PD = V # Principal directions
        PC = U.dot(np.diag(S)) # Principal components/Projected data on PDs
        EnPD = S**2 # Energy of PDs

    return PC,PD,EnPD








 def compute_kernel_kmeans_EM(nc,Ker,Theta,nb_trials):
    """
    Usages:
      [C_kmeans,En_kmeans] = compute_kernel_kmeans_EM(K,Ker,Theta,nb_trials)

    Note: Code based on Michael Chen ([email protected])'s code

    Notations:
      n = nb_data

    Input variables:
      K = nb_clusters
      Ker = Kernel matrix. Size = n x n.
      Theta = Weight for each data term. Size = n x 1.
      nb_trials = Number of kmeans runs.

    Output variables:
      C_kmeans = Computed kmeans clusters. Size = n x 1.
      En_kmeans = Energy of the kmeans partition.
    """

    start = time.time()
    n = Ker.shape[0]
    Theta = np.diag(np.ones(n)) # Equal weight for each data
    Ones = np.ones((1,n))
    En_kmeans = 1e10; # Init energy value

    # Run K-Means "nb_trials" times and keep the solution that best minimizes 
    # the K-Means energy.
    for k in range(nb_trials):

        # Initialization
        C = np.random.randint(nc,size=n) # random initialization
        Cold = np.ones([n])
        diffC = np.linalg.norm(C-Cold)/np.linalg.norm(Cold)
    
        # Loop
        k = 0
        while (k<30) & (diffC>1e-2):
    
            # Update iteration
            k += 1
            #print(k)
    
            # Distance Matrix D
            row = np.array(range(n))
            col = C
            data = np.ones(n)
            F = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, nc)).todense()
            O = np.diag( np.array( 1./ (Ones.dot(Theta).dot(F) + 1e-6) ).squeeze() )
            T = Ker.dot(Theta.dot(F.dot(O)))
            D = - 2* T + np.repeat( np.diag(O.dot((F.T).dot(Theta.dot(T))))[None,:] ,n,axis=0)
    
            # Extract clusters
            C = np.array(np.argmin(D,1)).squeeze()
                
            # L2 difference between two successive cluster configurations
            diffC = np.linalg.norm(C-Cold)/np.linalg.norm(Cold)
            Cold = C
        
        # K-Means energy
        En = np.multiply( (np.repeat(np.diag(Ker)[:,None],nc,axis=1) + D) , F)
        En = np.sum(En)/n


        # Check energy and keep the smallest one
        if En < En_kmeans:
            En_kmeans = En
            C_kmeans = C

    # Computational time
    #print('Computational time for Kernel K-Means with EM approach (sec): ',time.time() - start);

    return C_kmeans, En_kmeans





 def compute_kernel_kmeans_spectral(nc,Ker,Theta,nb_trials):
    """
    Usages:
      [C_kmeans,En_kmeans] = compute_kernel_kmeans_spectral(K,Ker,Theta,nb_trials)

    Notations:
      n = nb_data

    Input variables:
      K = nb_clusters
      Ker = Kernel matrix. Size = n x n.
      Theta = Weight for each data term. Size = n x 1.
      nb_trials = Number of standard kmeans runs.

    Output variables:
      C_kmeans = Computed kmeans clusters. Size = n x 1.
      En_kmeans = Energy of the kmeans partition.
    """


    start = time.time()
    n = Ker.shape[0]
    Theta = np.diag(Theta) # Weight for each data
    Ones = np.ones((1,n))

    # Eigenvalue Decomposition (EVD)
    A = (pow(Theta,0.5)).dot( Ker.dot(pow(Theta,0.5)) )
    lamb, U = scipy.sparse.linalg.eigsh(A, k=nc, which='LM') 
    U = U[:,::-1] # largest = index 0
    lamb = lamb[::-1]

    # Pre-process embedding coordinates Y
    Y = U - np.mean(U,axis=0) # zero-centered data
    Y = ( Y.T / np.sqrt(np.sum(Y**2,axis=1)+1e-10) ).T # L2 normalization of rows of Y

    # Run Standard/Linear K-Means on embedding coordinates Y
    Ker = construct_kernel(Y,'linear')
    C, En = compute_kernel_kmeans_EM(nc,Ker,Theta,10)

    # Outputs
    C_kmeans = C
    En_kmeans = En

    # Computational time
    #print('Computational time for Kernel K-Means with Spectral approach (sec):',time.time() - start);

    return C_kmeans, En_kmeans








 def plant_seeds(C, n, R, seeds_per_class):
    """
    Incremental Reseeding Algorithm for Clustering
    Xavier Bresson, Huiyi Hu, Thomas Laurent, Arthur Szlam, James von Brecht
    arXiv:1406.3837
    """

    # Init
    F = np.zeros((n,R))
    Bal = np.zeros(R)

    # Construct indicators of seeds
    for k in range(R):
        idx_in_class_k = [ i for i,item in enumerate(C) if C[i]==k ]
        size_class_k = len(idx_in_class_k)
        if size_class_k > n:
            size_class_k = int(n)
        seed_idx_k = np.random.permutation(range(size_class_k))
        seed_idx_k = seed_idx_k[0:int(seeds_per_class)]
        seed_idx_k = [ idx_in_class_k[item] for i,item in enumerate(seed_idx_k) ]
        F[seed_idx_k,k] = 1

    return F

 def compute_pcut(W,Cgt,R,speed=5.0,max_nb_iter=50,display=False):

    # Compute Random Walk Transition Matrix
    D = scipy.sparse.csc_matrix.sum(W,axis=1) # sum along rows
    Dinv = 1/D
    Dinv = Dinv.squeeze()
    n = D.shape[0]
    invD = scipy.sparse.spdiags(Dinv,0,n,n)
    RW = W * invD

    # Incremental Clustering
    speed = float(speed)
    maxiter = int(max_nb_iter)
    delta_seeds = np.array(speed*10**-4*n/R)
    nb_of_seeds = np.array(1) # initial nb of seeds 
    seeds_per_class = nb_of_seeds
    eps = np.array(10**-6)
    seed_idx = np.array(np.random.permutation(range(n)),dtype=np.int64)
    seed_idx = seed_idx[0:nb_of_seeds*R]
    C = np.random.randint(R,size=n)
    purity_tab = np.zeros(maxiter)

    # Loop
    for iter in range(maxiter):

        # Plant Seeds
        F = plant_seeds(C,n,R,seeds_per_class)

        # Random Walk
        k = 0
        while (np.min(F)<eps) & (k<50):
            F = RW.dot(F)
            k += 1

        # Thresholding
        C = np.argmax(F,axis=1)

        # Update seeds
        nb_of_seeds = nb_of_seeds + delta_seeds
        seeds_per_class = round(nb_of_seeds)

        # Purity
        if display & (not iter%10):
            print('iter=', iter, ', accuracy=', compute_purity(C,Cgt,R))
        
    return C, compute_purity(C,Cgt,R)






 #def construct_kernel(X,type_kernel):
 def construct_kernel(X,type_kernel,parameters=None):
    """
    Usages:
    (i)    Ker = construct_kernel(X,'linear');              # Ker = <Xi,Xj>
    (ii)   Ker = construct_kernel(X,'polynomial',[a,b,c]);  # Ker = ( a* <Xi,Xj> + b )^c
    (iii)  Ker = construct_kernel(X,'gaussian');            # Ker = exp( -|Xi-Xj|_2^2 / a ), automatic a
    (iv)   Ker = construct_kernel(X,'gaussian',a);          # Ker = exp( -|Xi-Xj|_2^2 / a ), given a
    (v)    Ker = construct_kernel(X,'sigmoid',[a,b]);       # Ker = tanh( a* <Xi,Xj> + b )
    (vi)   Ker = construct_kernel(X,'kNN_gaussian',k);
    (vii)  Ker = construct_kernel(X,'kNN_cosine',k);
    (viii) Ker = construct_kernel(X,'kNN_cosine_binary',k);

    Notations:
      n = nb_data
      d = data_dimensionality
      k = nb_nearest_neighbors

    Input variables:
      X = Data matrix. Size = n x d.
      type_kernel = Type of kernels: 'linear', 'polynomial', 'gaussian', 'sigmoid', 'kNN'.

    Output variables:
      Ker = Kernel matrix. Size = n x n.
      """

    start = time.time()
    n = X.shape[0]


    ####################################################
    # Linear Kernel = <Xi,Xj>
    # Ker = <Xi,Xj>
    ####################################################

    if type_kernel=='linear':
    
        print('Construct Linear Kernel')
    
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
    
        # Compute D = <Xi,Xj> for all pairwise data points size(D) =  n x n
        
        Ker = X.dot(X.T)

        

    ####################################################
    # Gaussian Kernel
    # Ker = exp( -|Xi-Xj|_2^2 / a ), automatic a
    ####################################################

    if type_kernel=='gaussian':
    
        print('Construct Gaussian Kernel')

        # Compute L2/Euclidean distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()        
        #Xzc = X - np.mean(X,axis=0) # zero-centered data
        D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)
        Ddist = 1.0* D

        # Parameters
        if parameters==None:
            
            k = round(n/100.)
            #print('k=',k)

            # Sort Distance matrix
            idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
            D.sort() # sort D from smallest to largest values
            D = D[:,:k]

            # Compute Ker matrix
            sigma2 = np.mean(D[:,-1])**2 # kernel scale
            #print('sigma2c',sigma2)

        else:

            sigma = parameters
            sigma2 = sigma**2
            #print('sigma2c',sigma2)

        Ker = np.exp(- Ddist**2 / sigma2)

        # Make Ker is symmetric
        bigger = Ker.T > Ker
        Ker = Ker - Ker.dot(bigger) + Ker.T.dot(bigger)



    ####################################################
    # Polynomial Kernel
    # Ker = ( a* <Xi,Xj> + b )^c
    ####################################################

    if type_kernel=='polynomial':
    
        print('Construct Polynomial Kernel')

        # Parameters
        a = parameters[0]
        b = parameters[1]
        c = parameters[2]

        # Compute Cosine distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
        D = Xl2proj.dot(Xl2proj.T)

        # Polynomial Kernel
        Ker = pow( np.abs( a* D + b ) , c )
    
        # Make Ker is symmetric
        bigger = Ker.T > Ker
        Ker = Ker - Ker.dot(bigger) + Ker.T.dot(bigger)



    ####################################################
    # kNN Gaussian Kernel
    ####################################################

    if type_kernel=='kNN_gaussian':
    
        print('Construct kNN Gaussian Kernel')
    
        # Parameters
        k = parameters

        # Compute L2/Euclidean distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()        
        #Xzc = X - np.mean(X,axis=0) # zero-centered data
        D = sklearn.metrics.pairwise.pairwise_distances(X, metric='euclidean', n_jobs=1)

        # Sort Distance matrix
        idx = np.argsort(D)[:,:k] # indices of k nearest neighbors
        D.sort() # sort D from smallest to largest values
        D = D[:,:k]

        # Compute Ker matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        Ker_val = np.exp(- D**2 / sigma2)

        # Make Ker sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = Ker_val.reshape(n*k)
        Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make Ker is symmetric
        bigger = Ker.T > Ker
        Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
        Ker = Ker.todense()



    ####################################################
    # kNN Cosine Kernel
    ####################################################

    if type_kernel=='kNN_cosine':
    
        print('Construct kNN Cosine Kernel')
    
        # Parameters
        k = parameters
        
        # Compute Cosine distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
        D = Xl2proj.dot(Xl2proj.T)

        # Sort D according in descending order     
        idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
        D.sort(axis=1)
        D[:] = D[:,::-1]

        # Cosine distance
        Dcos = np.abs(np.arccos(D-1e-10))
        D = Dcos
        D = D[:,:k]

        # Compute Ker matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        Ker_val = np.exp(- D**2 / sigma2)

        # Make Ker sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = Ker_val.reshape(n*k)
        Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = Ker.T > Ker
        Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
        Ker = Ker.todense()


    ####################################################
    # kNN Cosine Binary Kernel
    ####################################################

    if type_kernel=='kNN_cosine_binary':
    
        print('Construct kNN Cosine Binary Kernel')
    
        # Parameters
        k = parameters

        # Compute Cosine distance between all pairs of points
        if isinstance(X, np.ndarray)==False:
            X = X.toarray()
        
        Xzc = X - np.mean(X,axis=0) # zero-centered data
        Xl2proj = ( Xzc.T / np.sqrt(np.sum(Xzc**2,axis=1)+1e-10) ).T # Projection on the sphere, i.e. ||x_i||_2 = 1
        D = Xl2proj.dot(Xl2proj.T)

        # Sort D according in descending order     
        idx = np.argsort(D)[:,::-1][:,:k] # indices of k nearest neighbors
        D.sort(axis=1)
        D[:] = D[:,::-1]

        # Cosine distance
        Dcos = np.abs(np.arccos(D-1e-10))
        D = Dcos
        D = D[:,:k]

        # Compute Ker matrix
        sigma2 = np.mean(D[:,-1])**2 # graph scale
        Ker_val = np.exp(- D**2 / sigma2)

        # Make Ker sparse
        n = X.shape[0]
        row = np.arange(0, n).repeat(k)
        col = idx.reshape(n*k)
        data = np.ones([n*k])
        Ker = scipy.sparse.csr_matrix((data, (row, col)), shape=(n, n))

        # Make W is symmetric
        bigger = Ker.T > Ker
        Ker = Ker - Ker.multiply(bigger) + Ker.T.multiply(bigger)
        Ker = Ker.todense()






    #print('Computational time for Kernel construction (sec):',time.time() - start)
    
    return Ker








 def compute_SVM(Xtrain,Cgt_train,l_train,type_svm,param_svm=None,Xtest=None,Cgt_test=None,plot=None,type_graph=None,param_graph=None):
    """
    Usage:
      [alpha,f_test,C_test,acc_test] = compute_SVM(Xtrain,l,Xtest,Cgt_test,gammaG,type_graph,parameters_graph,gamma,type_kernel,parameters)

    Notations:
      n_train = nb of training data
      n_test = nb of test data
      d = data_dimensionality
      k = nb_nearest_neighbors

    Input variables:
      Xtrain = Training data matrix. Size = n_train x 1.
      l = Label vector. Size = n_train x 1. Values = +/-1.
      Xtest = Test data matrix. Size = n_test x 1.
      Cgt_test = Ground truth clusters of the test dataset. Size = n_test x 1.
      gammaG = Graph regularization paramater.
      type_graph = Type of graphs: 'no_graph', 'euclidean', 'euclidean_zelnik_perona', 'cosine', 'cosine_binary'
      gamma = SVM regularization paramater.
      type_kernel = Type of kernels: 'linear', 'polynomial', 'gaussian', 'sigmoid', 'kNN'.
      parameters = parameters used for Kernel construction

    Output variables:
      alpha = Coefficient vector of classification function. Size = n_train x 1.
      f_test = Continuous decision function. Size = n_test x 1.
      C_test = Binary decision function. Size = n_test x 1.
      acc_test = Accuracy of SVM classification on test dataset.
    """

    # Parameters
    n = Xtrain.shape[0]
    nc = len(np.unique(Cgt_train))
    gamma = param_svm[0]
    if not (plot==False):
        fig_id = plot[0]
        size_vertex_plot = plot[1]

    # Type of SVM technique
    if type_graph==None:
        if type_svm=='soft_linear':
            print('Run Linear SVM')
        
        if type_svm=='gaussian_kernel':
            print('Run Kernel SVM')

    if not (type_graph==None):
        if type_svm=='gaussian_kernel':
            print('Run Graph SVM with Gaussian Kernel')

    # Compute kernel
    if type_svm=='soft_linear':
        Ker = construct_kernel(Xtrain,'linear')

    if type_svm=='gaussian_kernel':
        if len(param_svm)<2: Ker = construct_kernel(Xtrain,'gaussian') # TODO
        sigma = param_svm[1]
        Ker = construct_kernel(Xtrain,'gaussian',sigma) 

    # Compute graph
    if not (type_graph==None):
        kNN = param_graph[0]
        gammaG = param_graph[1]
        W = construct_knn_graph(Xtrain,kNN,type_graph)
        Lap = graph_laplacian(W).todense()

    # Compute test kernel for classification error
    if type_svm=='soft_linear':
        KXtest = Xtrain.dot(Xtest.T)

    if type_svm=='gaussian_kernel':
        Ddist = sklearn.metrics.pairwise.pairwise_distances(Xtrain,Xtest, metric='euclidean', n_jobs=1)
        sigma2 = sigma**2
        KXtest = np.exp(- Ddist**2 / sigma2)

    # Compute Indicator function of labels
    H = np.zeros([n])
    H[np.abs(l_train)>0.0] = 1
    H = np.diag(H)

    # Compute L, Q
    L = np.diag(l_train)
    l = l_train
    T = np.eye(n) 
    if not (type_graph==None): 
        T += gammaG* Lap.dot(Ker) 

    Tinv = np.linalg.inv(T)
    Q = L.dot(H.dot(Ker.dot(Tinv.dot(H.dot(L)))))

    # Time steps
    sigma = 1./ np.linalg.norm(L,2)
    tau = 1./ np.linalg.norm(Q,2)

    # For conjuguate gradient
    Acg = tau* Q + np.eye(n)

    # Initialization
    x = np.zeros([n])
    y = 0.0
    xold = x

    # Plot
    if not (plot==None):
        fig = plt.figure(fig_id)
        fig.canvas.draw()
        plt.show(block=False)

    # Loop
    k = 0
    diffx = 1e6
    while (diffx>1e-3) & (k<1000):
            
        # Update iteration
        k += 1
            
        # Update x
        # Approximate solution with conjuguate gradient
        b = x + tau - tau* l* y
        x,_ = scipy.sparse.linalg.cg(Acg, b, x0=x, tol=1e-3, maxiter=50)   
        x[x<0.0] = 0 # Projection on [0,gamma]
        x[x>gamma] = gamma 

        # Update y
        y = y + sigma* l.T.dot(x)
            
        # Stopping condition
        diffx = np.linalg.norm(x-xold)
        xold = x
            
        # Plot
        if not(k%50):
                   
            # Lagrangian multipliers
            alpha = x

            # a vector
            a = Tinv.dot(H.dot(L.dot(alpha)))

            # b offset
            idx_unlabeled_data = np.where( np.abs(l)<1./2 )
            alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
            idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
            Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
            nb_sv = len(Isv.nonzero()[0])
            if nb_sv > 1:
                b = (Isv.T).dot( l - Ker.dot(a)  )/ nb_sv
            else:
                b = 0

            # Continuous decision function
            f_test = a.T.dot(KXtest) + b 

            # Binary decision function
            C_test = np.array( 1./2* ( 1+ np.sign(f_test) ) , dtype=np.int64) # decision function in {0,1}
            acc_test = compute_purity(C_test,Cgt_test,nc)

            # Plot
            if not (plot==None):
                plt.subplot(121)
                plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test)
                plt.title('Continuous decision function')
                plt.subplot(122)
                plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test)
                plt.title('Decision function, iter=' + str(k) + ', acc=' + str(round(acc_test,3)) )
                plt.tight_layout()
                plt.show()
                fig.canvas.draw()
                time.sleep(0.01)
            

    # Final operations
    # Lagrangian multipliers
    alpha = x
    # a vector
    a = Tinv.dot(H.dot(L.dot(alpha)))
    
    # b offset
    idx_unlabeled_data = np.where( np.abs(l)<1./2 )
    alpha_labels = alpha; alpha_labels[idx_unlabeled_data] = 0
    idx = np.where( np.abs(alpha_labels)>0.25* np.max(np.abs(alpha_labels)) )
    Isv = np.zeros([n]); Isv[idx] = 1 # Indicator function of Support Vectors
    nb_sv = len(Isv.nonzero()[0])
    if nb_sv > 1:
        b = (Isv.T).dot( l - Ker.dot(a)  )/ nb_sv
    else:
        b = 0
    #print('b=',b)

    # Continuous decision function
    f_test = a.T.dot(KXtest) + b
    # Binary decision function
    C_test = np.array( 1./2* ( 1+ np.sign(f_test) ) , dtype=np.int64) # decision function in {0,1}
    acc_test = compute_purity(C_test,Cgt_test,nc)

    # Plot
    if not (plot==None):
        plt.subplot(121)
        plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=f_test)
        plt.title('Continuous decision function')
        plt.subplot(122)
        plt.scatter(Xtest[:,0], Xtest[:,1], s=size_vertex_plot*np.ones(n), c=C_test)
        plt.title('Decision function, iter=' + str(k) + ', acc=' + str(round(acc_test,3)) )
        plt.tight_layout()
        plt.show()
        fig.canvas.draw()
        time.sleep(0.01)


    return alpha, f_test, C_test, acc_test





 def shrink(x,mu):
    """Soft shrinkage operator"""

    s = np.sqrt(x**2)
    ss = s - mu
    ss = ss* ( ss>0.0 )
    s = s + ( s<mu )
    ss = ss/ s
    res = ss* x

    return res
	# ### python_ncut_lib.py
	# Copyright (C) 2010 R. Cameron Craddock ([email protected])
	#
	# This script is a part of the pyClusterROI python toolbox for the spatially constrained clustering of fMRI
	# data. It provides the library functions for performing normalized cut clustering according to:
	#
	# Shi, J., & Malik, J. (2000). Normalized cuts and image segmentation. IEEE Transactions on Pattern Analysis
	# and Machine Intelligence, 22(8), 888-905. doi: 10.1109/34.868688.
	#
	# Yu, S. X., & Shi, J. (2003). Multiclass spectral clustering. Proceedings Ninth IEEE International Conference
	# on Computer Vision, (1), 313-319 vol.1. Ieee. doi: 10.1109/ICCV.2003.1238361.
	#
	# This code is a port of the NcutClustering_7 matlab toolbox available here: http://www.cis.upenn.edu/~jshi/software/
	#
	# For more information refer to:
	#
	# Craddock, R. C., James, G. A., Holtzheimer, P. E., Hu, X. P., & Mayberg, H. S. (2011). A whole
	# brain fMRI atlas generated via spatially constrained spectral clustering. Human brain mapping,
	# doi: 10.1002/hbm.21333.
	#
	# Documentation, updated source code and other information can be found at the NITRC web page:
	# http://www.nitrc.org/projects/cluster_roi/
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation, either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	####

	# this scripts requires NumPy (numpy.scipy.org) and SciPy (www.scipy.org) to be
	# installed in a directory that is accessible through PythonPath
	import sys
	from numpy import array, reshape, shape, matrix, ones, zeros, sqrt, sort, arange
	from numpy import nonzero, fromfile, tile, append, prod, double, argsort, sign
	from numpy import kron, multiply, divide, abs, reshape, asarray
	from scipy import rand
	from scipy.sparse import csc_matrix, spdiags
	from scipy.sparse.linalg.eigen.arpack import eigsh
	from scipy.linalg import norm, svd, LinAlgError

	# exception hander for singular value decomposition
	class SVDError(Exception):
	def __init__(self, value):
	self.value = value

	def __str__(self):
	return repr(self.value)


	# (eigen_val, eigen_vec) = ncut( W, nbEigenValues ):
	#
	# This function performs the first step of normalized cut spectral clustering. The normalized LaPlacian
	# is calculated on the similarity matrix W, and top nbEigenValues eigenvectors are calculated. The number of
	# eigenvectors corresponds to the maximum number of classes (K) that will be produced by the clustering algorithm.
	#
	# W: symmetric #feature x #feature sparse matrix representing the similarity between voxels,
	# traditionally this matrix should be positive semidefinite, but a trick is implemented to
	# allow negative matrix entries
	# nvEigenValues: number of eigenvectors that should be calculated, this determines the maximum number of
	# clusters (K) that can be derived from the result
	# eigen_val: (output) eigenvalues from the eigen decomposition of the LaPlacian of W
	# eigen_vec: (output) eigenvectors from the eign decomposition of the LaPlacian of W
	#
	def ncut(W, nbEigenValues):
	# parameters
	offset = .5
	maxiterations = 100
	eigsErrorTolerence = 1e-6
	truncMin = 1e-6
	eps = 2.2204e-16

	m = shape(W)[1]

	# make sure that W is symmetric, this is a computationally expensive operation, only use for debugging
	#if (W-W.transpose()).sum() != 0:
	# print "W should be symmetric!"
	# exit(0)

	# degrees and regularization
	# S Yu Understanding Popout through Repulsion CVPR 2001
	# Allows negative values as well as improves invertability
	# of d for small numbers
	# i bet that this is what improves the stability of the eigen
	d = abs(W).sum(0)
	dr = 0.5 * (d - W.sum(0))
	d = d + offset * 2
	dr = dr + offset

	# calculation of the normalized LaPlacian
	W = W + spdiags(dr, [0], m, m, "csc")
	Dinvsqrt = spdiags((1.0 / sqrt(d + eps)), [0], m, m, "csc")
	P = Dinvsqrt * (W * Dinvsqrt);

	# perform the eigen decomposition
	#eigen_val, eigen_vec = eigsh(P, nbEigenValues, maxiter=maxiterations, tol=eigsErrorTolerence, which='LA')
	eigen_val, eigen_vec = eigsh(P, nbEigenValues, tol=eigsErrorTolerence, which='LA')

	# sort the eigen_vals so that the first
	# is the largest
	i = argsort(-eigen_val)
	eigen_val = eigen_val[i]
	eigen_vec = eigen_vec[:, i]

	# normalize the returned eigenvectors
	eigen_vec = Dinvsqrt * matrix(eigen_vec)
	norm_ones = norm(ones((m, 1)))
	for i in range(0, shape(eigen_vec)[1]):
	eigen_vec[:, i] = (eigen_vec[:, i] / norm(eigen_vec[:, i])) * norm_ones
	if eigen_vec[0, i] != 0:
	eigen_vec[:, i] = -1 * eigen_vec[:, i] * sign(eigen_vec[0, i])

	return (eigen_val, eigen_vec)


	# eigenvec_discrete=discretisation( eigen_vec ):
	#
	# This function performs the second step of normalized cut clustering which assigns features to clusters
	# based on the eigen vectors from the LaPlacian of a similarity matrix. There are a few different ways to
	# perform this task. Shi and Malik (2000) iteratively bisect the features based on the positive and
	# negative loadings of the eigenvectors. Ng, Jordan and Weiss (2001) proposed to perform K-means clustering
	# on the rows of the eigenvectors. The method implemented here was proposed by Yu and Shi (2003) and it finds
	# a discrete solution by iteratively rotating a binaised set of vectors until they are maximally similar to
	# the the eigenvectors (for more information, the full citation is at the top of this file). An advantage
	# of this method over K-means is that it is _more_ deterministic, i.e. you should get very similar results
	# every time you run the algorithm on the same data.
	#
	# The number of clusters that the features are clustered into is determined by the number of eignevectors
	# (number of columns) in the input array eigen_vec. A caveat of this method, is that number of resulting
	# clusters is bound by the number of eignevectors, but it may contain less.
	#
	# eigen_vec: Eigenvectors of the normalized LaPlacian calculated from the similarity matrix
	# for the corresponding clustering problem
	# eigen_vec_discrete: (output) discretised eigenvectors, i.e. vectors of 0 and 1 which indicate
	# wether or not a feature belongs to the cluster defined by the eigen vector.
	# I.E. a one in the 10th row of the 4th eigenvector (column) means that feature
	# 10 belongs to cluster #4.
	#
	def discretisation(eigen_vec):
	eps = 2.2204e-16

	# normalize the eigenvectors
	[n, k] = shape(eigen_vec)
	vm = kron(ones((1, k)), sqrt(multiply(eigen_vec, eigen_vec).sum(1)))
	eigen_vec = divide(eigen_vec, vm)

	svd_restarts = 0
	exitLoop = 0

	### if there is an exception we try to randomize and rerun SVD again
	### do this 30 times
	while (svd_restarts < 30) and (exitLoop == 0):

	# initialize algorithm with a random ordering of eigenvectors
	c = zeros((n, 1))
	R = matrix(zeros((k, k)))
	R[:, 0] = eigen_vec[int(rand(1) * (n)), :].transpose()

	for j in range(1, k):
	c = c + abs(eigen_vec * R[:, j - 1])
	R[:, j] = eigen_vec[c.argmin(), :].transpose()

	lastObjectiveValue = 0
	nbIterationsDiscretisation = 0
	nbIterationsDiscretisationMax = 20

	# iteratively rotate the discretised eigenvectors until they
	# are maximally similar to the input eignevectors, this
	# converges when the differences between the current solution
	# and the previous solution differs by less than eps or we
	# we have reached the maximum number of itarations
	while exitLoop == 0:
	nbIterationsDiscretisation = nbIterationsDiscretisation + 1

	# rotate the original eigen_vectors
	tDiscrete = eigen_vec * R

	# discretise the result by setting the max of each row=1 and
	# other values to 0
	j = reshape(asarray(tDiscrete.argmax(1)), n)
	eigenvec_discrete = csc_matrix((ones(len(j)), (range(0, n), array(j))), shape=(n, k))

	# calculate a rotation to bring the discrete eigenvectors cluster to the
	# original eigenvectors
	tSVD = eigenvec_discrete.transpose() * eigen_vec
	# catch a SVD convergence error and restart
	try:
	U, S, Vh = svd(tSVD)
	svd_restarts += 1
	except LinAlgError:
	# catch exception and go back to the beginning of the loop
	print >> sys.stderr, "SVD did not converge, randomizing and trying again"
	break

	# test for convergence
	NcutValue = 2 * (n - S.sum())
	if ((abs(NcutValue - lastObjectiveValue) < eps ) or
	( nbIterationsDiscretisation > nbIterationsDiscretisationMax )):
	exitLoop = 1
	else:
	# otherwise calculate rotation and continue
	lastObjectiveValue = NcutValue
	R = matrix(Vh).transpose() * matrix(U).transpose()

	if exitLoop == 0:
	raise SVDError("SVD did not converge after 30 retries")
	else:
	return (eigenvec_discrete)