Created
February 27, 2020 02:54
-
-
Save mbk0asis/2e5f4573dab140bd3b8c321865e9bf34 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if (!requireNamespace("BiocManager", quietly = TRUE)) | |
install.packages(c("BiocManager","dplyr","gplots")) | |
BiocManager::install(c("GEOquery")) | |
######################################################################### | |
# load libraries | |
library(Biobase) | |
library(GEOquery) | |
library(limma) | |
library(dplyr) | |
library(gplots) | |
######################################################################### | |
setwd("~/BIO5/GDS5218_Muscle_Exercise_Dr.Yang") | |
# Download GDS file | |
gds5218 <- getGEO('GDS5218', destdir=".") | |
#Or, open an existing GDS file | |
#gds5218 <- getGEO(filename='GDS5218.soft.gz') | |
######################################################################### | |
Meta(gds5218)$channel_count | |
Meta(gds5218)$description | |
Meta(gds5218)$feature_count | |
Meta(gds5218)$platform | |
Meta(gds5218)$sample_count | |
Meta(gds5218)$sample_organism | |
Meta(gds5218)$sample_type | |
Meta(gds5218)$title | |
Meta(gds5218)$type | |
######################################################################### | |
# To generate a expression dataset | |
eset <- GDS2eSet(gds5218, do.log2=TRUE) | |
eset | |
e <- exprs(eset) | |
head(e[,1:5]) | |
#write.csv(e,"All.Expression.csv") | |
# to see the sample phenotypes | |
#show(pData(phenoData(eset))[,1:3]) | |
boxplot(e, col=c(rep("grey30",15),rep("grey",16), | |
rep("grey50",15),rep("lightgrey",16), | |
rep("grey30",12),rep("grey",12), | |
rep("grey50",12),rep("lightgrey",12))) | |
# PCA | |
pca.data <- e | |
grps <- c(rep("yBase",15),rep("y1st",16), | |
rep("yB4Final",15),rep("yFinal",16), | |
rep("oBase",12),rep("o1st",12), | |
rep("oB4Final",12),rep("oFinal",12)) | |
grpcol <- c(rep(rgb(0,0,1,0.6),15),rep(rgb(.2,.5,1,0.6),16), | |
rep(rgb(1,1,1,0),15),rep(rgb(0,1,1,0.5),16), | |
rep(rgb(1,0,0,0.6),12),rep(rgb(1,.5,0,0.6),12), | |
rep(rgb(1,1,1,0),12),rep(rgb(1,.7,.3,0.6),12)) | |
pca.data <- t(pca.data) | |
colnames(pca.data) <- paste(grps, colnames(pca.data), sep="-") | |
pca <- prcomp(pca.data, scale=TRUE) | |
summary(pca) | |
plot(pca$x[,1], pca$x[,2], xlab="PCA1", ylab="PCA2", main="PCA", type="p", pch=19, cex=2, col=grpcol) | |
#text(pca$x[,1], pca$x[,2], rownames(pca.data), cex=0.75) | |
######################################################################### | |
design <- cbind(yBase=c(rep(1,15),rep(0,95)), | |
y1st=c(rep(0,15), rep(1,16), rep(0,79)), | |
yPL=c(rep(0,31), rep(1,15), rep(0,64)), | |
yFinal = c(rep(0,46), rep(1,16), rep(0,48)), | |
oBase=c(rep(0,62), rep(1,12), rep(0,36)), | |
o1st=c(rep(0,74), rep(1,12), rep(0,24)), | |
oPL=c(rep(0,86), rep(1,12), rep(0,12)), | |
oFinal = c(rep(0,98), rep(1,12))) | |
# Differential expression analysis | |
###################################### | |
###################################### | |
###################################### | |
pval=0.01 | |
log2FoldChange=log2(1.5) # "log2(1)" or "0" for not to consider fold-changes | |
control <- "yBase" | |
sample <- "y1st" | |
###################################### | |
###################################### | |
###################################### | |
cont <- paste(sample,"-",control, sep="") | |
c <- makeContrasts(cont, levels=design) | |
# DE analysis | |
fit <- lmFit(eset,design) | |
fit2<- contrasts.fit(fit,c) | |
fit2<- eBayes(fit2) | |
n <- length(featureNames(eset)) | |
top <- topTable(fit2, coef=1, n=n, adjust = "fdr") | |
res <- top[,c(3,10,22:26)] | |
# aggregate to gene symbols by means | |
aggdata <-aggregate(res, by=list(res$Gene.symbol), | |
FUN=mean, na.rm=TRUE) | |
aggdata2 <- aggdata[,-c(1,2,3)] | |
row.names(aggdata2) <- aggdata[,1] | |
head(aggdata2) | |
dim(aggdata2) | |
#write.csv(aggdata2,paste("DE_whole_genes_",sample,"_vs_",control,".csv",sep="")) | |
######################################################################### | |
nrow1 <- nrow(subset(aggdata2, P.Value <= pval & logFC >= log2FoldChange)) | |
nrow2 <- nrow(subset(aggdata2, P.Value <= pval & logFC <= -log2FoldChange)) | |
#aggdata2$logFC[abs(aggdata2) >= 4] <- 4 # replace (foldChange > 4) to 4 | |
aggdata2$P.Value[aggdata2$P.Value <= 1e-15] <- 1e-15 # replace (pvalue < 1e-15) to 1e-15 | |
with(aggdata2, plot(logFC,-log10(P.Value), pch=19,cex=1, | |
col=rgb(.5,.5,.5,.5), | |
main=paste("Whole genes\npval < ",pval,",log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep=""))) | |
with(subset(aggdata2, P.Value<=pval & logFC>=log2FoldChange), | |
points(logFC,-log10(P.Value),pch=19,cex=.9,col=rgb(1,.2,.2,.5))) | |
with(subset(aggdata2, P.Value<=pval & logFC<=-log2FoldChange), | |
points(logFC,-log10(P.Value),pch=19,cex=.9,col=rgb(0,0,1,.5))) | |
abline(h=c(-log10(pval)),col="grey30",lty=2) | |
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2) | |
abline(v=0,col="black") | |
######################################################################### | |
annot <- res[,1:2] | |
#write.csv(annot,"annotation.csv") | |
#res_sig <- subset(aggdata2, P.Value <= pval & abs(logFC) >= log2FoldChange) | |
#res_sig <- res_sig[order(-res_sig$logFC),] | |
#head(res_sig) | |
#dim(res_sig) | |
#write.csv(res_sig,paste("sigDEGs_",sample,"_vs_",control,".csv",sep="")) | |
######################################################################### | |
# check if "CLCF1" is included | |
clcf <- aggdata[grepl("CLCF1",aggdata[,1]),] | |
#ccl2 <- aggdata[grepl("^CCL2$",aggdata[,1]),] | |
#res_sig[grepl("CLCF1",res_sig[,1]),] | |
######################################################################### | |
#write.csv(res_sig,file=paste("limma_sigDEGs_",sample,"_vs_",control,".csv",sep=""), quote=F, row.names=F) | |
######################################################################### | |
#secretory.proteins <- read.csv("secretory.proteins.csv", header=F) | |
res_secretory.proteins <- merge(secretory.proteins, aggdata, by.x="V1", by.y="Group.1") | |
head(res_secretory.proteins) | |
dim(res_secretory.proteins) | |
#write.csv(res_secretory.proteins,paste("DE_secretory.proteins_",sample,"_vs_",control,".csv",sep="")) | |
# | |
# | |
# | |
res_sig <- subset(res_secretory.proteins, P.Value <= pval & abs(logFC) >= log2FoldChange) | |
res_sig <- res_sig[order(-res_sig$logFC),] | |
dim(res_sig) | |
#write.csv(res_sig,paste("sigDEGs_secretoryProteins_",sample,"_vs_",control,".csv",sep="")) | |
#res_non_sig <- subset(res_secretory.proteins, P.Value > pval & abs(logFC) < log2FoldChange) | |
#write.csv(res_non_sig,"non_sigDEGs_o1st_oBase.csv") | |
########################################################################## | |
nrow1 <- nrow(subset(res_secretory.proteins, P.Value <= pval & logFC >= log2FoldChange)) | |
nrow2 <- nrow(subset(res_secretory.proteins, P.Value <= pval & logFC <= -log2FoldChange)) | |
res_secretory.proteins$logFC[abs(res_secretory.proteins$logFC) >= 4] <- 4 # replace (foldChange > 4) to 4 | |
res_secretory.proteins$P.Value[res_secretory.proteins$P.Value <= 1e-8] <- 1e-8 # replace (pvalue < 1e-15) to 1e-15 | |
# Volcano plot | |
with(res_secretory.proteins, plot(logFC,-log10(P.Value), pch=19,cex=1.3, | |
col=rgb(.5,.5,.5,.5), | |
main=paste("Secretory proteins\npval < ",pval,", log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep=""))) | |
with(subset(res_secretory.proteins, P.Value<=pval & logFC>=log2FoldChange), | |
points(logFC,-log10(P.Value),pch=19,cex=1.2,col=rgb(1,.2,.2,.6))) | |
with(subset(res_secretory.proteins, P.Value<=pval & logFC<=-log2FoldChange), | |
points(logFC,-log10(P.Value),pch=19,cex=1.2,col=rgb(0,0,1,.6))) | |
abline(h=c(-log10(pval)),col="grey30",lty=2) | |
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2) | |
abline(v=0,col="black") | |
points(clcf$logFC,-log10(clcf$P.Value),col="blue", pch=19) | |
#text(clcf$logFC,-log10(clcf$P.Value), "CLCF1", col="blue", pos=1, cex = 1.3) | |
#points(ccl2$logFC,-log10(ccl2$P.Value),col="blue", pch=19) | |
#text(ccl2$logFC,-log10(ccl2$P.Value), "CCL2", col="blue", pos=3, cex = 1.3) | |
######################################################################### | |
######################################################################### | |
common <- read.csv("common2", header = F) | |
common | |
common.genes1 <- merge(common,res_secretory.proteins) | |
write.csv(common.genes1,paste("common.genes2.",sample,"_",control,".csv",sep="")) | |
# | |
######################################################################### | |
######################################################################### | |
common.genes <- merge(common,e) | |
head(common.genes) | |
dim(common.genes) | |
dta2 <- common.genes[,-c(1)] | |
head(dta2[,1:5]) | |
dim(dta2) | |
######################################################################### | |
######################################################################### | |
test <- read.csv("secretory.proteins.annot.Expression.csv", header = F,row.names = 1) | |
head(test[,1:5]) | |
agg_test <-aggregate(test, by=list(test$V2), | |
FUN=mean, na.rm=TRUE) | |
agg_test2 <- agg_test[,-c(1,2,34:48,89:100)] | |
rownames(agg_test2) <- agg_test[,1] | |
colnames(agg_test2) <-c(rep("yBase",15), rep("y1st",16), rep("yFinal",16), | |
rep("oBase",12), rep("o1st",12), rep("oFinal",12)) | |
dim(agg_test2) | |
write.csv(agg_test2,"Expression.matrix_secretory.proteins.csv") | |
library(gplots) | |
mean_yBase <- rowMeans(agg_test2[,c(1:15)]) | |
#mean_y1st <- rowMeans(dta2[,c(16:31)]) | |
#mean_yFinal <- rowMeans(dta2[,c(46:61)]) | |
fc <- agg_test2 - mean_yBase | |
head(fc) | |
n2 <- grep("219500_at",rownames(fc)) | |
n3 <- nrow(fc) | |
my_palette <- colorRampPalette(c("steelblue", "white", "red")) | |
breaks = unique(c(seq(-1,0,length=75), seq(0,1,length=75))) | |
heatmap.2(as.matrix(fc), main="Secretory Proteins (2772 genes)\nlog2 (Rel. to yBase)", | |
breaks = breaks, col=my_palette,density="none",trace="none", | |
Rowv = T,Colv = T, dendrogram="both", margins = c(8,8), | |
key.title="Pearson",key.xlab = NULL,key.ylab = NULL, | |
ColSideColors=c(rep("red",15), rep("green",16), rep("blue",16), | |
rep("orange",12), rep("darkgreen",12), rep("darkblue",12))) | |
# RowSideColors=c(rep("white",n2-1),rep("blue",1),rep("white",n3-n2))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment