Created
October 27, 2013 20:24
-
-
Save wenhuizhang/7187469 to your computer and use it in GitHub Desktop.
PCA_R_HSI_1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Do an R-mode using prcomp(), for correlation and covariance among variables | |
#for Q-mode PCA :the data set should be transposed before procceeding | |
#Q-mode focuses on correlations and covariance among samples | |
mydata<-read.table(file="/Users/WenhuiZhang/Desktop/hypeScpec_R/R_hyperspectra/mydata.txt",header=TRUE,row.name=1,sep=",") | |
mydata.pca<-prcomp(mydata,retx=TRUE,center=TRUE,scale.=TRUE) | |
#variable means set to zero, and variance set to one sample scores stored in mydata.pca$x | |
#loadings stored in mydata.pca$rotation | |
#singular values (square roots of eigenvalues) stored in mydata.pca$sdev | |
#variable means stored in mydata.pca$center | |
#variable standard deviations stored in mydata.pca$scale | |
sd<-mydata.pca$sdev | |
loadings<-mydata.pca$rotation | |
rownames(loadings)<-colnames(mydata) | |
scores<-mydata.pca$x | |
###############Step 2 : do a PCA longhand in R###################################################### | |
R<-cor(mydata) | |
#calculate a correlation matrix | |
myEig<-eigen(R) | |
#find the eigenvalues and eigenvectors of correlation matrix | |
#eigenvalues stored in myEig$values | |
#eigenvectors (loadings) sotred in myEig$vectors | |
sdLONG<-sqrt(myEig$values) | |
#calculating singular values from eigenvalues | |
loadingsLONG<-myEig$vectors | |
rownames(loadingsLONG)<-colnames(mydata) | |
#saving as loadings, and setting rownames | |
standardize<-function(x){(x-mean(x))/sd(x)} | |
X<-apply(mydata,MARGIN=2,FUN=standardize) | |
#transforming data to zero mean and unit variance | |
scoresLONG<-X %*% loadingsLONG | |
#calculating scores from eigenanalysis results | |
############Step 3 : compare results from the two analysis to demonstrate equivalency.#### | |
########### Maximum differences should be close to zero if the two approches are equivalent #### | |
range(sd - sdLONG) | |
range(loadings -loadingsLONG) | |
range(scores - scoresLONG) | |
##########Step 4 : distance biplot################# | |
quartz(height=7,width=7) | |
plot(scores[,1],scores[,2],xlab="PCA_1",ylab="PCA_2",type="n",xlim=c(min(scores[,1:2]),max(scores[,1:2])),ylim=c(min(scores[,1:2]),max(scores[,1:2]))) | |
arrows(0,0,loadings[,1],loadings[,2],length=0.1,angle=20,col="red") | |
text(loadings[,1]*1.2,loadings[,2]*1.2, rownames(loadings),col="red",cex=0.7) #1.2 as a plotting scale | |
text(scores[,1],scores[,2],rownames(scores),col="blue",cex=0.7) | |
quartz(height=7,width=7) | |
biplot(scores[,1:2],loadings[,1:2],xlab=rownames(scores),ylab=rownames(loadings),cex=0.7) | |
#########Step 5 : correlation biplot################# | |
quartz(height=7,width=7) | |
plot(scores[,1]/sd[1],scores[,2]/sd[2],xlab="PCA_1",ylab="PCA_2",type="n") | |
arrows(0,0,loadings[,1]*sd[1],loadings[,2]*sd[2],length=0.1,angle=20,col="red") | |
text(loadings[,1]*sd[1]*1.2,loadings[,2]*sd[2]*1.2,col="red",cex=0.7) | |
text(scores[,1]/sd[1],scores[,2]/sd[2],rownames(scores),col="blue",cex=0.7) | |
quartz(height=7,width=7) | |
biplot(mydata.pca) | |
##########Step 6 : correlation coeffients between variables and PC/pricipal componets##### | |
correlations<-t(loadings)*sd | |
correlations<-cor(scores,mydata) | |
##########Step 7 : variance against PC numbers, just top PC scores showed########## | |
quartz(height=7,width=7) | |
plot(mydata.pca) | |
####### with variance on a log scale################## | |
quartz(height=7,width=7) | |
plot(log(sd^2),xlab="principal component", ylab="log(variance)",type="b",pch=16) | |
#########Step 8 : find variance along each principal component and the eigenvalues###### | |
newsd<-sd(scores) | |
max(sd-newsd) | |
eigenvalues<-sd^2 | |
sum(eigenvalues) | |
length(mydata) #number of variables for my PCA study | |
########Step 9 : save loadings/scores/singular values######################### | |
#write.table(loadings,file="loadings.txt") | |
#write.table(scores,file="scores.txt") | |
#write.table(sd,file="sd.txt") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
Could you let me know where can I download the "mydata.txt" file?
Thanks