-
-
Save understar/9031369 to your computer and use it in GitHub Desktop.
PCA python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from numpy import mean,cov,double,cumsum,dot,linalg,array,rank | |
from pylab import plot,subplot,axis,stem,show,figure | |
def princomp(A): | |
""" performs principal components analysis | |
(PCA) on the n-by-p data matrix A | |
Rows of A correspond to observations, columns to variables. | |
Returns : | |
coeff : | |
is a p-by-p matrix, each column containing coefficients | |
for one principal component. | |
score : | |
the principal component scores; that is, the representation | |
of A in the principal component space. Rows of SCORE | |
correspond to observations, columns to components. | |
latent : | |
a vector containing the eigenvalues | |
of the covariance matrix of A. | |
""" | |
# computing eigenvalues and eigenvectors of covariance matrix | |
M = (A-mean(A.T,axis=1)).T # subtract the mean (along columns) | |
[latent,coeff] = linalg.eig(cov(M)) # attention:not always sorted | |
score = dot(coeff.T,M) # projection of the data in the new space | |
return coeff,score,latent | |
A = array([ [2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9], | |
[2.5,0.5,2.2,1.9,3.1,2.3, 2, 1,1.5,1.1] ]) | |
coeff, score, latent = princomp(A.T) | |
figure() | |
subplot(121) | |
# every eigenvector describe the direction | |
# of a principal component. | |
m = mean(A,axis=1) | |
plot([0, -coeff[0,0]*2]+m[0], [0, -coeff[0,1]*2]+m[1],'--k') | |
plot([0, coeff[1,0]*2]+m[0], [0, coeff[1,1]*2]+m[1],'--k') | |
plot(A[0,:],A[1,:],'ob') # the data | |
axis('equal') | |
subplot(122) | |
# new data | |
plot(score[0,:],score[1,:],'*g') | |
axis('equal') | |
show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment