Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mbk0asis/7e081d732cedc4cdac6f9027ad8b3482 to your computer and use it in GitHub Desktop.
Save mbk0asis/7e081d732cedc4cdac6f9027ad8b3482 to your computer and use it in GitHub Desktop.
library(reshape2)
library(ggplot2)
library(ggpubr)
library(tidyverse)
library(gridExtra)
library(Biobase)
library(GEOquery)
library(limma)
library(matrixStats)
###################################################################################
setwd("~/Desktop/public_data/")
#Schabath <- read.csv("~/Desktop/public_data/48_Schabath_2016.csv", row.names = 1, header = T)
Shedden <- read.csv("~/Desktop/public_data/1_Shedden_2008.csv", row.names = 1, header = T)
#Rousseaux <- read.csv("~/Desktop/public_data/31_Rousseaux_2013.csv", row.names = 1, header = T)
Sato <- read.csv("~/Desktop/public_data/40_Sato_2013.csv", row.names = 1, header = T)
#Okayama <- read.csv("~/Desktop/public_data/32_Okayama_2012.csv", row.names = 1, header = T)
#Botling <- read.csv("~/Desktop/public_data/38_Botling_2013.csv", row.names = 1, header = T)
###################################################################################
#setdb1 = 9869
Schabath2 <- data.frame(t(Schabath[grep("^9869$", rownames(Schabath)),]))
Shedden2 <- data.frame(t(Shedden[grep("^9869$", rownames(Shedden)),]))
Rousseaux2 <- data.frame(t(Rousseaux[grep("^9869$", rownames(Rousseaux)),]))
Sato2 <- data.frame(t(Sato[grep("^9869$", rownames(Sato)),]))
Okayama2 <- data.frame(t(Okayama[grep("^9869$", rownames(Okayama)),]))
Botling2 <- data.frame(t(Botling[grep("^9869$", rownames(Botling)),]))
setdb1 <- cbind(Sato2)
colnames(setdb1) <- c("setdb1")
head(setdb1)
m_setdb1 <- setdb1 %>%
rownames_to_column("id") %>%
reshape2::melt(id.var = c("id"))
head(m_setdb1)
p1 <- ggplot(m_setdb1, aes(x=variable, y=value, fill=variable)) + theme_bw() +
geom_jitter(width=.3) +
geom_boxplot(outlier.shape = NA) +
theme(legend.position = "none")
high3 <- m_setdb1 %>% arrange(desc(value)) %>% head(50)
low3 <- m_setdb1 %>% arrange(desc(value)) %>% tail(50)
val <- data.frame(high3$value,low3$value)
m_val <- reshape2::melt(val)
head(m_val)
p2 <- ggplot(m_val, aes(x=variable, y=value, fill=variable)) + theme_bw() +
geom_jitter(width=.3) +
geom_boxplot(outlier.shape = NA) +
stat_summary(fun.y=mean, geom="point", color="red", size=3) +
stat_summary(fun.y=mean, geom="text", aes(label=round(..y..,2), size =2)) +
theme(legend.position = "none") +
scale_fill_manual(values=c("orange", "green3"))
grid.arrange(p1, p2, nrow = 1, widths = c(1.2, 2))
#ggsave("Botling.png", p3, width = 5, height = 5, units = "in")
####################################################################################
####################################################################################
# DE analysis (limma)
t_df <- data.frame(t(Sato))
high_matrix <- t_df %>%
rownames_to_column("id") %>%
inner_join(high3, by="id")
low_matrix <- t_df %>%
rownames_to_column("id") %>%
inner_join(low3, by="id")
matrix <- rbind(high_matrix, low_matrix)
head(matrix[,1:5])
dim(matrix)
matrix2 <- matrix[,-c(1,20358,20359)]
matrix2 <- matrix[,-c(1,19619,19620)]
matrix2 <- matrix[,-c(1,12261,12262)]
rownames(matrix2) <- matrix$id
matrix3 <- t(matrix2)
head(matrix3)
# PCA
pca.data <- matrix3
grps <- c(rep("High",50),rep("low",50))
pca.data <- t(pca.data)
colnames(pca.data) <- paste(grps, colnames(pca.data), sep="-")
pca <- prcomp(pca.data, scale=TRUE)
summary(pca)
grpcol <- c(rep(rgb(0,0,1),50),rep(rgb(1,0,0),50))
plot(pca$x[,1], pca$x[,2], xlab="PCA1", ylab="PCA2", main="PCA", type="p", pch=19, cex=1, col=grpcol)
#text(pca$x[,1], pca$x[,2], rownames(pca.data), cex=0.75)
#########################################################################
design <- cbind(high=c(rep(1,50),rep(0,50)),
low = c(rep(0,50), rep(1,50)))
# Differential expression analysis
######################################
######################################
######################################
sample <- "high"
control <- "low"
#cont <- paste(sample,"-",control, sep="")
#c <- makeContrasts(cont, levels=design)
c <- makeContrasts(HIGH.vs.LOW=high-low, levels=design)
# DE analysis
eset <- ExpressionSet(
assayData = matrix3
)
fit <- lmFit(eset,design)
fit2<- contrasts.fit(fit,c)
fit2<- eBayes(fit2)
n <- length(featureNames(eset))
top <- topTable(fit2, coef="HIGH.vs.LOW", n=n, adjust = "fdr")
head(top)
# means
mean_high <- 2^(fit$coefficients[,1])
mean_low <- 2^(fit$coefficients[,2])
means <- data.frame(mean_high, mean_low)
head(means)
# stdev
matrix4 <- 2^(matrix3)
sd_high <- rowSds(matrix4[,1:50])
sd_low <- rowSds(matrix4[,51:100])
sd <- data.frame(sd_high, sd_low)
rownames(sd) <- rownames(matrix4)
head(sd)
# attach means, sd
top2 <- merge(top,means, by=0)
head(top2)
top3 <- merge(top2,sd, by.x="Row.names", by.y=0)
head(top3)
dim(top3)
write.csv(top3, "DE_LIMMA_WholGenes_Sato.csv")
#########################################################################
pval=1e-3
log2FoldChange=log2(1) # "log2(1)" or "0" for not to consider fold-changes
nrow1 <- nrow(subset(top, adj.P.Val <= pval & logFC >= log2FoldChange))
nrow2 <- nrow(subset(top, adj.P.Val <= pval & logFC <= -log2FoldChange))
#top$logFC[abs(top) >= 4] <- 4 # replace (foldChange > 4) to 4
top$adj.P.Val[top$adj.P.Val <= 1e-20] <- 1e-20 # replace (pvalue < 1e-15) to 1e-15
with(top, plot(logFC,-log10(adj.P.Val), pch=19,cex=.8,
col="grey",
main=paste("DEGs\npadj < ",pval,", log2FC > ",round(log2FoldChange,3),"\n",control," (",nrow2,") < > ",sample," (",nrow1,")",sep="")))
with(subset(top, adj.P.Val<=pval & logFC>=log2FoldChange),
points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(1,0,0,.5)))
with(subset(top, adj.P.Val<=pval & logFC<=-log2FoldChange),
points(logFC,-log10(adj.P.Val),pch=19,cex=1,col=rgb(0,0,1)))
abline(h=c(-log10(pval)),col="grey30",lty=2)
abline(v=c(-log2FoldChange,log2FoldChange),col="grey30",lty=2)
abline(v=0,col="black")
options(bitmapType="cairo")
install.packages("Cairo")
getOption("bitmapType")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment