-
-
Save bistaumanga/6268985 to your computer and use it in GitHub Desktop.
import numpy as np | |
import pylab as plt | |
''' | |
Performs the Principal Coponent analysis of the Matrix X | |
Matrix must be n * m dimensions | |
where n is # features | |
m is # examples | |
''' | |
def PCA(X, varRetained = 0.95, show = False): | |
# Compute Covariance Matrix Sigma | |
(n, m) = X.shape | |
Sigma = 1.0 / m * X * np.transpose(X) | |
# Compute eigenvectors and eigenvalues of Sigma | |
U, s, V = np.linalg.svd(Sigma, full_matrices = True) | |
# compute the value k: number of minumum features that | |
# retains the given variance | |
sTot = np.sum(s) | |
var_i = np.array([np.sum(s[: i + 1]) / \ | |
sTot * 100.0 for i in range(n)]) | |
k = len(var_i[var_i < (varRetained * 100)]) | |
print '%.2f %% variance retained in %d dimensions' \ | |
% (var_i[k], k) | |
# plot the variance plot | |
if show: | |
plt.plot(var_i) | |
plt.xlabel('Number of Features') | |
plt.ylabel(' Percentage Variance retained') | |
plt.title('PCA $\% \sigma^2 $ vs # features') | |
plt.show() | |
# compute the reduced dimensional features by projction | |
U_reduced = U[:, : k] | |
Z = np.transpose(U_reduced) * X | |
return Z, U_reduced |
import csv as csv | |
import numpy as np | |
import Activation, logReg, optim, loadData | |
################################################################# | |
# reading from csv | |
print 'Loading Training Data' | |
csv_train = csv.reader(open('../data/train.csv', 'rb')) | |
header = csv_train.next() | |
data = [[map(int, row[1:]), [int(row[0])]] for row in csv_train] | |
train = loadData.Data() | |
train.loadList(data, numClasses = 10) | |
train.NormalizeScale(factor = 255.0) | |
################################################################# | |
# PCA of training set | |
print 'Performing PCA - Principal COmponent Analysis' | |
import npPCA | |
Z, U_reduced = npPCA.PCA(train.X, varRetained = 0.95, show = True) |
Hi Bistaumanga,
I am trying to use the pca.py function in one of my simulation. I am getting the following error:
//////////////////
Performing PCA - Principal COmponent Analysis
Traceback (most recent call last):
File "TOD_Katdal_MeerKat_Noise_w_mask_simu.py", line 187, in
Fg_pca_fitted, Fg_pca_reduced = fgutil_simu.PCA_NU(tod_xx_mask_freq_time, varRetained = 0.95, show = False) # Freq, Time
File "/home/abhik/IM/MeerKat_IM/1_F_Noise/Meerkat-1-f-Noise/fgutil_simu.py", line 114, in PCA_NU
Sigma = 1.0 / m * X * np.transpose(X)
File "/usr/local/lib/python2.7/dist-packages/numpy/ma/core.py", line 4003, in mul
return multiply(self, other)
File "/usr/local/lib/python2.7/dist-packages/numpy/ma/core.py", line 1016, in call
result = self.f(da, db, *args, **kwargs)
ValueError: operands could not be broadcast together with shapes (526,4150) (4150,526)
////////////////
If I change Sigma calculation to "Sigma = 1.0 / m * (X.T.dot(X))" it seems to run....Do you think it is ok ? Thanks.
great work but if you are using svd, we don't need to calculate the co-variance matrix or scatter matrix see : https://stats.stackexchange.com/questions/134282/relationship-between-svd-and-pca-how-to-use-svd-to-perform-pca
Hey, the value for sigma which you've used is doing element-wise multiplication rather matrix multiplication
Sigma = 1.0 / m * X * np.transpose( X )
Sigma = 1.0 / m * np.dot(X , X.T)
Similarly, Z matrix in your code.
I hope it helps.
Hey, the value for sigma which you've used is doing element-wise multiplication rather matrix multiplication
Sigma = 1.0 / m * X * np.transpose( X ) Sigma = 1.0 / m * np.dot(X , X.T)
Similarly, Z matrix in your code.
I hope it helps.
Considering that X is matrix-type, The original code is correct.
X and transpose(x) are doing element-wise multiplication only if X is an array.
Principal Component Analysis implemented in python.