Created
February 27, 2020 02:52
-
-
Save mbk0asis/7e081d732cedc4cdac6f9027ad8b3482 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(reshape2) | |
library(ggplot2) | |
library(ggpubr) | |
library(tidyverse) | |
library(gridExtra) | |
library(Biobase) | |
library(GEOquery) | |
library(limma) | |
library(matrixStats) | |
################################################################################### | |
setwd("~/Desktop/public_data/") | |
#Schabath <- read.csv("~/Desktop/public_data/48_Schabath_2016.csv", row.names = 1, header = T) | |
Shedden <- read.csv("~/Desktop/public_data/1_Shedden_2008.csv", row.names = 1, header = T) | |
#Rousseaux <- read.csv("~/Desktop/public_data/31_Rousseaux_2013.csv", row.names = 1, header = T) | |
Sato <- read.csv("~/Desktop/public_data/40_Sato_2013.csv", row.names = 1, header = T) | |
#Okayama <- read.csv("~/Desktop/public_data/32_Okayama_2012.csv", row.names = 1, header = T) | |
#Botling <- read.csv("~/Desktop/public_data/38_Botling_2013.csv", row.names = 1, header = T) | |
################################################################################### | |
#setdb1 = 9869 | |
Schabath2 <- data.frame(t(Schabath[grep("^9869$", rownames(Schabath)),])) | |
Shedden2 <- data.frame(t(Shedden[grep("^9869$", rownames(Shedden)),])) | |
Rousseaux2 <- data.frame(t(Rousseaux[grep("^9869$", rownames(Rousseaux)),])) | |
Sato2 <- data.frame(t(Sato[grep("^9869$", rownames(Sato)),])) | |
Okayama2 <- data.frame(t(Okayama[grep("^9869$", rownames(Okayama)),])) | |
Botling2 <- data.frame(t(Botling[grep("^9869$", rownames(Botling)),])) | |
setdb1 <- cbind(Sato2) | |
colnames(setdb1) <- c("setdb1") | |
head(setdb1) | |
m_setdb1 <- setdb1 %>% | |
rownames_to_column("id") %>% | |
reshape2::melt(id.var = c("id")) | |
head(m_setdb1) | |
p1 <- ggplot(m_setdb1, aes(x=variable, y=value, fill=variable)) + theme_bw() + | |
geom_jitter(width=.3) + | |
geom_boxplot(outlier.shape = NA) + | |
theme(legend.position = "none") | |
high3 <- m_setdb1 %>% arrange(desc(value)) %>% head(50) | |
low3 <- m_setdb1 %>% arrange(desc(value)) %>% tail(50) | |
val <- data.frame(high3$value,low3$value) | |
m_val <- reshape2::melt(val) | |
head(m_val) | |
p2 <- ggplot(m_val, aes(x=variable, y=value, fill=variable)) + theme_bw() + | |
geom_jitter(width=.3) + | |
geom_boxplot(outlier.shape = NA) + | |
stat_summary(fun.y=mean, geom="point", color="red", size=3) + | |
stat_summary(fun.y=mean, geom="text", aes(label=round(..y..,2), size =2)) + | |
theme(legend.position = "none") + | |
scale_fill_manual(values=c("orange", "green3")) | |
grid.arrange(p1, p2, nrow = 1, widths = c(1.2, 2)) | |
#ggsave("Botling.png", p3, width = 5, height = 5, units = "in") | |
#################################################################################### | |
#################################################################################### | |
# DE analysis (limma) | |
t_df <- data.frame(t(Sato)) | |
high_matrix <- t_df %>% | |
rownames_to_column("id") %>% | |
inner_join(high3, by="id") | |
low_matrix <- t_df %>% | |
rownames_to_column("id") %>% | |
inner_join(low3, by="id") | |
matrix <- rbind(high_matrix, low_matrix) | |
head(matrix[,1:5]) | |
dim(matrix) | |
matrix2 <- matrix[,-c(1,20358,20359)] | |
matrix2 <- matrix[,-c(1,19619,19620)] | |
matrix2 <- matrix[,-c(1,12261,12262)] | |
rownames(matrix2) <- matrix$id | |
matrix3 <- t(matrix2) | |
head(matrix3) | |
# PCA | |
pca.data <- matrix3 | |
grps <- c(rep("High",50),rep("low",50)) | |
pca.data <- t(pca.data) | |
colnames(pca.data) <- paste(grps, colnames(pca.data), sep="-") | |
pca <- prcomp(pca.data, scale=TRUE) | |
summary(pca) | |
grpcol <- c(rep(rgb(0,0,1),50),rep(rgb(1,0,0),50)) | |
plot(pca$x[,1], pca$x[,2], xlab="PCA1", ylab="PCA2", main="PCA", type="p", pch=19, cex=1, col=grpcol) | |
#text(pca$x[,1], pca$x[,2], rownames(pca.data), cex=0.75) | |
######################################################################### | |
design <- cbind(high=c(rep(1,50),rep(0,50)), | |
low = c(rep(0,50), rep(1,50))) | |
# Differential expression analysis | |
###################################### | |
###################################### | |
###################################### | |
sample <- "high" | |
control <- "low" | |
#cont <- paste(sample,"-",control, sep="") | |
#c <- makeContrasts(cont, levels=design) | |
c <- makeContrasts(HIGH.vs.LOW=high-low, levels=design) | |
# DE analysis | |
eset <- ExpressionSet( | |
assayData = matrix3 | |
) | |
fit <- lmFit(eset,design) | |
fit2<- contrasts.fit(fit,c) | |
fit2<- eBayes(fit2) | |
n <- length(featureNames(eset)) | |
top <- topTable(fit2, coef="HIGH.vs.LOW", n=n, adjust = "fdr") | |
head(top) | |
# means | |
mean_high <- 2^(fit$coefficients[,1]) | |
mean_low <- 2^(fit$coefficients[,2]) | |
means <- data.frame(mean_high, mean_low) | |
head(means) | |
# stdev | |
matrix4 <- 2^(matrix3) | |
sd_high <- rowSds(matrix4[,1:50]) | |
sd_low <- rowSds(matrix4[,51:100]) | |
sd <- data.frame(sd_high, sd_low) | |
rownames(sd) <- rownames(matrix4) | |
head(sd) | |
# attach means, sd | |
top2 <- merge(top,means, by=0) | |
head(top2) | |
top3 <- merge(top2,sd, by.x="Row.names", by.y=0) | |
head(top3) | |
dim(top3) | |
write.csv(top3, "DE_LIMMA_WholGenes_Sato.csv") | |
######################################################################### | |
pval=1e-3 | |
log2FoldChange=log2(1) # "log2(1)" or "0" for not to consider fold-changes | |
nrow1 <- nrow(subset(top, adj.P.Val <= pval & logFC >= log2FoldChange)) | |
nrow2 <- nrow(subset(top, adj.P.Val <= pval & logFC <= -log2FoldChange)) | |
#top$logFC[abs(top) >= 4] <- 4 # replace (foldChange > 4) to 4 | |
top$adj.P.Val[top$adj.P.Val <= 1e-20] <- 1e-20 # replace (pvalue < 1e-15) to 1e-15 | |
with(top, plot(logFC,-log10(adj.P.Val), pch=19,cex=.8, | |
col="grey", | |
main=paste("DEGs\npadj < ",pval,", log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep=""))) | |
with(subset(top, adj.P.Val<=pval & logFC>=log2FoldChange), | |
points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(1,0,0,.5))) | |
with(subset(top, adj.P.Val<=pval & logFC<=-log2FoldChange), | |
points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(0,0,1))) | |
abline(h=c(-log10(pval)),col="grey30",lty=2) | |
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2) | |
abline(v=0,col="black") | |
options(bitmapType="cairo") | |
install.packages("Cairo") | |
getOption("bitmapType") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment