Last active
August 29, 2015 14:09
-
-
Save jnpaulson/e2a0afd677a426b42e08 to your computer and use it in GitHub Desktop.
pca/pcoa plots of the american gut consortium data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(vegan) | |
require(biom) | |
require(metagenomeSeq) | |
files = grep(".biom$",list.files(),value=TRUE) | |
files2= grep(".txt",list.files(),value=TRUE) | |
files3 = sprintf("%s_MRexperiment.rdata",files) | |
# This generates the data - only run once. | |
for(i in 1:length(files)){ | |
obj = load_biom(files[i]) | |
pd = read.csv(files2[i],sep="\t",stringsAsFactors=FALSE) | |
mc = match(colnames(obj),pd[,1]) | |
rownames(pd) = pd[,1] | |
pd = AnnotatedDataFrame(pd[mc,]) | |
fd = AnnotatedDataFrame(fData(obj)) | |
obj = newMRexperiment(MRcounts(obj),featureData = fd,phenoData = pd) | |
obj = cumNorm(obj,p=cumNormStat(obj)) | |
save(obj,file=files3[i]) | |
rm(list=c(obj,pd,fd)) | |
} | |
# this actually plots the data | |
for(i in files3){ | |
load(i) | |
show(i) | |
#genus | |
genus = log2(aggTax(obj,lvl="X6",norm=TRUE,out='matrix')+1) | |
#species | |
species = log2(aggTax(obj,lvl="X7",norm=TRUE,out='matrix')+1) | |
sampleID = factor(sapply(strsplit(colnames(obj),"\\."),function(i){i[1]})) | |
pdf(sprintf("~/Desktop/%s.pdf",i)) | |
plotOrd(obj,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances | |
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l") | |
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID)) | |
dev.off() | |
pdf(sprintf("~/Desktop/%s_genus.pdf",i)) | |
plotOrd(genus,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances | |
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l") | |
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID)) | |
dev.off() | |
pdf(sprintf("~/Desktop/%s_species.pdf",i)) | |
plotOrd(species,main=files[i],pch=21,bg=sampleID) # check options for pcoa / various distances | |
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l") | |
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID)) | |
dev.off() | |
} | |
## Placing names / separating body sites etc | |
location = sampleID = factor(sapply(strsplit(colnames(obj),"\\."),function(i){i[2]})) | |
locals = lapply(unique(location),function(i){ | |
which(location==i) | |
}) | |
names(locals) = unique(location) | |
locals = locals[which(sapply(locals,length)>1)] | |
for(j in locals){ | |
obj2= obj[,j] | |
genus2=genus[,j] | |
species2 = species[,j] | |
sampleID2 = sampleID[j] | |
pdf(sprintf("~/Desktop/forjustin/%s.pdf",paste(i,j,sep=":"))) | |
vals = plotOrd(obj2,main=files[i],pch=21,bg=sampleID2,ret=TRUE) | |
text(x=vals,labels=sampleID2,cex=.5) | |
plot(0,0,ylim=c(0,100),xlim=c(0,10),bty="l") | |
legend("topright",legend=unique(sampleID),box.col=NA,fill=unique(sampleID)) | |
dev.off() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment