Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Created February 27, 2020 02:54
Show Gist options
  • Save mbk0asis/2e5f4573dab140bd3b8c321865e9bf34 to your computer and use it in GitHub Desktop.
Save mbk0asis/2e5f4573dab140bd3b8c321865e9bf34 to your computer and use it in GitHub Desktop.
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages(c("BiocManager","dplyr","gplots"))
BiocManager::install(c("GEOquery"))
#########################################################################
# load libraries
library(Biobase)
library(GEOquery)
library(limma)
library(dplyr)
library(gplots)
#########################################################################
setwd("~/BIO5/GDS5218_Muscle_Exercise_Dr.Yang")
# Download GDS file
gds5218 <- getGEO('GDS5218', destdir=".")
#Or, open an existing GDS file
#gds5218 <- getGEO(filename='GDS5218.soft.gz')
#########################################################################
Meta(gds5218)$channel_count
Meta(gds5218)$description
Meta(gds5218)$feature_count
Meta(gds5218)$platform
Meta(gds5218)$sample_count
Meta(gds5218)$sample_organism
Meta(gds5218)$sample_type
Meta(gds5218)$title
Meta(gds5218)$type
#########################################################################
# To generate a expression dataset
eset <- GDS2eSet(gds5218, do.log2=TRUE)
eset
e <- exprs(eset)
head(e[,1:5])
#write.csv(e,"All.Expression.csv")
# to see the sample phenotypes
#show(pData(phenoData(eset))[,1:3])
boxplot(e, col=c(rep("grey30",15),rep("grey",16),
rep("grey50",15),rep("lightgrey",16),
rep("grey30",12),rep("grey",12),
rep("grey50",12),rep("lightgrey",12)))
# PCA
pca.data <- e
grps <- c(rep("yBase",15),rep("y1st",16),
rep("yB4Final",15),rep("yFinal",16),
rep("oBase",12),rep("o1st",12),
rep("oB4Final",12),rep("oFinal",12))
grpcol <- c(rep(rgb(0,0,1,0.6),15),rep(rgb(.2,.5,1,0.6),16),
rep(rgb(1,1,1,0),15),rep(rgb(0,1,1,0.5),16),
rep(rgb(1,0,0,0.6),12),rep(rgb(1,.5,0,0.6),12),
rep(rgb(1,1,1,0),12),rep(rgb(1,.7,.3,0.6),12))
pca.data <- t(pca.data)
colnames(pca.data) <- paste(grps, colnames(pca.data), sep="-")
pca <- prcomp(pca.data, scale=TRUE)
summary(pca)
plot(pca$x[,1], pca$x[,2], xlab="PCA1", ylab="PCA2", main="PCA", type="p", pch=19, cex=2, col=grpcol)
#text(pca$x[,1], pca$x[,2], rownames(pca.data), cex=0.75)
#########################################################################
design <- cbind(yBase=c(rep(1,15),rep(0,95)),
y1st=c(rep(0,15), rep(1,16), rep(0,79)),
yPL=c(rep(0,31), rep(1,15), rep(0,64)),
yFinal = c(rep(0,46), rep(1,16), rep(0,48)),
oBase=c(rep(0,62), rep(1,12), rep(0,36)),
o1st=c(rep(0,74), rep(1,12), rep(0,24)),
oPL=c(rep(0,86), rep(1,12), rep(0,12)),
oFinal = c(rep(0,98), rep(1,12)))
# Differential expression analysis
######################################
######################################
######################################
pval=0.01
log2FoldChange=log2(1.5) # "log2(1)" or "0" for not to consider fold-changes
control <- "yBase"
sample <- "y1st"
######################################
######################################
######################################
cont <- paste(sample,"-",control, sep="")
c <- makeContrasts(cont, levels=design)
# DE analysis
fit <- lmFit(eset,design)
fit2<- contrasts.fit(fit,c)
fit2<- eBayes(fit2)
n <- length(featureNames(eset))
top <- topTable(fit2, coef=1, n=n, adjust = "fdr")
res <- top[,c(3,10,22:26)]
# aggregate to gene symbols by means
aggdata <-aggregate(res, by=list(res$Gene.symbol),
FUN=mean, na.rm=TRUE)
aggdata2 <- aggdata[,-c(1,2,3)]
row.names(aggdata2) <- aggdata[,1]
head(aggdata2)
dim(aggdata2)
#write.csv(aggdata2,paste("DE_whole_genes_",sample,"_vs_",control,".csv",sep=""))
#########################################################################
nrow1 <- nrow(subset(aggdata2, P.Value <= pval & logFC >= log2FoldChange))
nrow2 <- nrow(subset(aggdata2, P.Value <= pval & logFC <= -log2FoldChange))
#aggdata2$logFC[abs(aggdata2) >= 4] <- 4 # replace (foldChange > 4) to 4
aggdata2$P.Value[aggdata2$P.Value <= 1e-15] <- 1e-15 # replace (pvalue < 1e-15) to 1e-15
with(aggdata2, plot(logFC,-log10(P.Value), pch=19,cex=1,
col=rgb(.5,.5,.5,.5),
main=paste("Whole genes\npval < ",pval,",log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep="")))
with(subset(aggdata2, P.Value<=pval & logFC>=log2FoldChange),
points(logFC,-log10(P.Value),pch=19,cex=.9,col=rgb(1,.2,.2,.5)))
with(subset(aggdata2, P.Value<=pval & logFC<=-log2FoldChange),
points(logFC,-log10(P.Value),pch=19,cex=.9,col=rgb(0,0,1,.5)))
abline(h=c(-log10(pval)),col="grey30",lty=2)
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2)
abline(v=0,col="black")
#########################################################################
annot <- res[,1:2]
#write.csv(annot,"annotation.csv")
#res_sig <- subset(aggdata2, P.Value <= pval & abs(logFC) >= log2FoldChange)
#res_sig <- res_sig[order(-res_sig$logFC),]
#head(res_sig)
#dim(res_sig)
#write.csv(res_sig,paste("sigDEGs_",sample,"_vs_",control,".csv",sep=""))
#########################################################################
# check if "CLCF1" is included
clcf <- aggdata[grepl("CLCF1",aggdata[,1]),]
#ccl2 <- aggdata[grepl("^CCL2$",aggdata[,1]),]
#res_sig[grepl("CLCF1",res_sig[,1]),]
#########################################################################
#write.csv(res_sig,file=paste("limma_sigDEGs_",sample,"_vs_",control,".csv",sep=""), quote=F, row.names=F)
#########################################################################
#secretory.proteins <- read.csv("secretory.proteins.csv", header=F)
res_secretory.proteins <- merge(secretory.proteins, aggdata, by.x="V1", by.y="Group.1")
head(res_secretory.proteins)
dim(res_secretory.proteins)
#write.csv(res_secretory.proteins,paste("DE_secretory.proteins_",sample,"_vs_",control,".csv",sep=""))
#
#
#
res_sig <- subset(res_secretory.proteins, P.Value <= pval & abs(logFC) >= log2FoldChange)
res_sig <- res_sig[order(-res_sig$logFC),]
dim(res_sig)
#write.csv(res_sig,paste("sigDEGs_secretoryProteins_",sample,"_vs_",control,".csv",sep=""))
#res_non_sig <- subset(res_secretory.proteins, P.Value > pval & abs(logFC) < log2FoldChange)
#write.csv(res_non_sig,"non_sigDEGs_o1st_oBase.csv")
##########################################################################
nrow1 <- nrow(subset(res_secretory.proteins, P.Value <= pval & logFC >= log2FoldChange))
nrow2 <- nrow(subset(res_secretory.proteins, P.Value <= pval & logFC <= -log2FoldChange))
res_secretory.proteins$logFC[abs(res_secretory.proteins$logFC) >= 4] <- 4 # replace (foldChange > 4) to 4
res_secretory.proteins$P.Value[res_secretory.proteins$P.Value <= 1e-8] <- 1e-8 # replace (pvalue < 1e-15) to 1e-15
# Volcano plot
with(res_secretory.proteins, plot(logFC,-log10(P.Value), pch=19,cex=1.3,
col=rgb(.5,.5,.5,.5),
main=paste("Secretory proteins\npval < ",pval,", log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep="")))
with(subset(res_secretory.proteins, P.Value<=pval & logFC>=log2FoldChange),
points(logFC,-log10(P.Value),pch=19,cex=1.2,col=rgb(1,.2,.2,.6)))
with(subset(res_secretory.proteins, P.Value<=pval & logFC<=-log2FoldChange),
points(logFC,-log10(P.Value),pch=19,cex=1.2,col=rgb(0,0,1,.6)))
abline(h=c(-log10(pval)),col="grey30",lty=2)
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2)
abline(v=0,col="black")
points(clcf$logFC,-log10(clcf$P.Value),col="blue", pch=19)
#text(clcf$logFC,-log10(clcf$P.Value), "CLCF1", col="blue", pos=1, cex = 1.3)
#points(ccl2$logFC,-log10(ccl2$P.Value),col="blue", pch=19)
#text(ccl2$logFC,-log10(ccl2$P.Value), "CCL2", col="blue", pos=3, cex = 1.3)
#########################################################################
#########################################################################
common <- read.csv("common2", header = F)
common
common.genes1 <- merge(common,res_secretory.proteins)
write.csv(common.genes1,paste("common.genes2.",sample,"_",control,".csv",sep=""))
#
#########################################################################
#########################################################################
common.genes <- merge(common,e)
head(common.genes)
dim(common.genes)
dta2 <- common.genes[,-c(1)]
head(dta2[,1:5])
dim(dta2)
#########################################################################
#########################################################################
test <- read.csv("secretory.proteins.annot.Expression.csv", header = F,row.names = 1)
head(test[,1:5])
agg_test <-aggregate(test, by=list(test$V2),
FUN=mean, na.rm=TRUE)
agg_test2 <- agg_test[,-c(1,2,34:48,89:100)]
rownames(agg_test2) <- agg_test[,1]
colnames(agg_test2) <-c(rep("yBase",15), rep("y1st",16), rep("yFinal",16),
rep("oBase",12), rep("o1st",12), rep("oFinal",12))
dim(agg_test2)
write.csv(agg_test2,"Expression.matrix_secretory.proteins.csv")
library(gplots)
mean_yBase <- rowMeans(agg_test2[,c(1:15)])
#mean_y1st <- rowMeans(dta2[,c(16:31)])
#mean_yFinal <- rowMeans(dta2[,c(46:61)])
fc <- agg_test2 - mean_yBase
head(fc)
n2 <- grep("219500_at",rownames(fc))
n3 <- nrow(fc)
my_palette <- colorRampPalette(c("steelblue", "white", "red"))
breaks = unique(c(seq(-1,0,length=75), seq(0,1,length=75)))
heatmap.2(as.matrix(fc), main="Secretory Proteins (2772 genes)\nlog2 (Rel. to yBase)",
breaks = breaks, col=my_palette,density="none",trace="none",
Rowv = T,Colv = T, dendrogram="both", margins = c(8,8),
key.title="Pearson",key.xlab = NULL,key.ylab = NULL,
ColSideColors=c(rep("red",15), rep("green",16), rep("blue",16),
rep("orange",12), rep("darkgreen",12), rep("darkblue",12)))
# RowSideColors=c(rep("white",n2-1),rep("blue",1),rep("white",n3-n2)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment