- 
      
- 
        Save bistaumanga/6268985 to your computer and use it in GitHub Desktop. 
| import numpy as np | |
| import pylab as plt | |
| ''' | |
| Performs the Principal Coponent analysis of the Matrix X | |
| Matrix must be n * m dimensions | |
| where n is # features | |
| m is # examples | |
| ''' | |
| def PCA(X, varRetained = 0.95, show = False): | |
| # Compute Covariance Matrix Sigma | |
| (n, m) = X.shape | |
| Sigma = 1.0 / m * X * np.transpose(X) | |
| # Compute eigenvectors and eigenvalues of Sigma | |
| U, s, V = np.linalg.svd(Sigma, full_matrices = True) | |
| # compute the value k: number of minumum features that | |
| # retains the given variance | |
| sTot = np.sum(s) | |
| var_i = np.array([np.sum(s[: i + 1]) / \ | |
| sTot * 100.0 for i in range(n)]) | |
| k = len(var_i[var_i < (varRetained * 100)]) | |
| print '%.2f %% variance retained in %d dimensions' \ | |
| % (var_i[k], k) | |
| # plot the variance plot | |
| if show: | |
| plt.plot(var_i) | |
| plt.xlabel('Number of Features') | |
| plt.ylabel(' Percentage Variance retained') | |
| plt.title('PCA $\% \sigma^2 $ vs # features') | |
| plt.show() | |
| # compute the reduced dimensional features by projction | |
| U_reduced = U[:, : k] | |
| Z = np.transpose(U_reduced) * X | |
| return Z, U_reduced | 
| import csv as csv | |
| import numpy as np | |
| import Activation, logReg, optim, loadData | |
| ################################################################# | |
| # reading from csv | |
| print 'Loading Training Data' | |
| csv_train = csv.reader(open('../data/train.csv', 'rb')) | |
| header = csv_train.next() | |
| data = [[map(int, row[1:]), [int(row[0])]] for row in csv_train] | |
| train = loadData.Data() | |
| train.loadList(data, numClasses = 10) | |
| train.NormalizeScale(factor = 255.0) | |
| ################################################################# | |
| # PCA of training set | |
| print 'Performing PCA - Principal COmponent Analysis' | |
| import npPCA | |
| Z, U_reduced = npPCA.PCA(train.X, varRetained = 0.95, show = True) | 
Hi Bistaumanga,
I am trying to use the pca.py function in one of my simulation. I am getting the following error:
//////////////////
Performing PCA - Principal COmponent Analysis
Traceback (most recent call last):
File "TOD_Katdal_MeerKat_Noise_w_mask_simu.py", line 187, in 
Fg_pca_fitted,  Fg_pca_reduced = fgutil_simu.PCA_NU(tod_xx_mask_freq_time, varRetained = 0.95, show = False) # Freq, Time
File "/home/abhik/IM/MeerKat_IM/1_F_Noise/Meerkat-1-f-Noise/fgutil_simu.py", line 114, in PCA_NU
Sigma = 1.0 / m * X * np.transpose(X)
File "/usr/local/lib/python2.7/dist-packages/numpy/ma/core.py", line 4003, in mul
return multiply(self, other)
File "/usr/local/lib/python2.7/dist-packages/numpy/ma/core.py", line 1016, in call
result = self.f(da, db, *args, **kwargs)
ValueError: operands could not be broadcast together with shapes (526,4150) (4150,526)
////////////////
If I change  Sigma calculation to "Sigma = 1.0 / m * (X.T.dot(X))" it seems to run....Do you think it is ok ? Thanks.
great work but if you are using svd, we don't need to calculate the co-variance matrix or scatter matrix see : https://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
Hey, the value for sigma which you've used is doing element-wise multiplication rather matrix multiplication
Sigma = 1.0 / m * X * np.transpose( X )
Sigma = 1.0 / m * np.dot(X , X.T)
Similarly, Z matrix in your code.
I hope it helps.
Hey, the value for sigma which you've used is doing element-wise multiplication rather matrix multiplication
Sigma = 1.0 / m * X * np.transpose( X ) Sigma = 1.0 / m * np.dot(X , X.T)Similarly, Z matrix in your code.
I hope it helps.
Considering that X is matrix-type, The original code is correct.
X and transpose(x) are doing element-wise multiplication only if X is an array.
Principal Component Analysis implemented in python.