Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Last active April 27, 2016 02:09
Show Gist options
  • Save mbk0asis/7584361473682f449742f6453f7e10cc to your computer and use it in GitHub Desktop.
Save mbk0asis/7584361473682f449742f6453f7e10cc to your computer and use it in GitHub Desktop.
library(DESeq)
library(LSD)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
# open a count file
setwd('/home/bio1/00-NGS/RNAseq/bov/GSE44023_Chitwood_et_al/DESeq')
setwd('/home/bio1/00-NGS/Cancers/cancer_met_Kyuheum')
countsTable<- read.csv("final.final.csv",row.names=1)
countsTable <- read.delim("/home/bio2/00-NGS/Hairpin/Bismark_results/Read1_only/All.genomecov.intersect.txt",row.names=1)
head(countsTable)
colnames(countsTable) = c("MEF","P0","P1","P2","P6","J1")
head(countsTable)
conds<- factor(c("Biliary","Biliary","Biliary","Biliary","Biliary","Biliary",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other","Other","Other","Other","Other",
"Other","Other","Other","Other","Other","Other"))
conds<- factor(c("IVF","IVF","IVF",
"cSCNT","cSCNT","cSCNT","cSCNT",
"fSCNT","fSCNT","fSCNT","fSCNT",
"TSA_IVF","TSA_SCNT","BM","BF"))
heatboxplot(log2(countsTable[,1:2]+1),ylim=c(-0.3,20), notch=TRUE,boxonly=F,
ylab = "log2 counts", main = "Count Distribution")
# Define column names
#colnames(countsTable) = c("geneSymbol","MI-1","MI-2","MI-3","MI-4","MI-5","MI-6",
"MN-1","MN-2","MN-3","MN-4","MN-5","MN-6",
"MS-1","MS-2","MS-3","MS-4","MS-5","MS-6",
"FI-1","FI-2","FI-3","FI-4","FI-5","FI-6",
"FN-1","FN-2","FN-3","FN-4","FN-5","FN-6",
"FS-1","FS-2","FS-3","FS-4","FS-5"
,"BM","BF")
#head(countsTable)
# Define conditons of each column (sample)
conds<- factor(c("MI","MI","MI","MI","MI","MI",
"MN","MN","MN","MN","MN","MN",
"MS","MS","MS","MS","MS","MS",
"FI","FI","FI","FI","FI","FI",
"FN","FN","FN","FN","FN","FN",
"FS","FS","FS","FS","FS",
"BM","BF"))
conds<- factor(c("MI","MI","MI","MI","MI","MI",
"MN","MN","MN","MN","MN","MN",
"FI","FI","FI","FI","FI","FI",
"FN","FN","FN","FN","FN","FN",
"BM","BF"))
# Set up cds (count data set)
cds <- newCountDataSet(countsTable,conds)
head(cds)
# Estimate sample size of each sample
cds <- estimateSizeFactors(cds)
sizeFactors(cds)
write.table(sizeFactors(cds),'/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/sizeFactor.txt')
countsNormalized <- counts(cds,normalized=T)
head(countsNormalized)
write.csv(countsNormalized,"cntNorm_Old.TSA.BESF.csv")
# Heatscatter plots
heatboxplot(log2(countsNormalized+1),ylim=c(-0.3,20), notch=TRUE,boxonly=F,
ylab = "log2 counts", main = "Count Normalized Distribution")
# scatter plot - individuals
heatscatter(log2(countsNormalized[,36]),log2(countsNormalized[,14]),
xlab="BM (log2 expression)",ylab="MS_2 (log2 expression)",
cor = TRUE, method = "spearman",
xlim=c(-0.5,17),ylim=c(-0.5,17))
abline(-1,1,col="black",lwd=2)
abline(1,1,col="black",lwd=2)
identify(log(countsNormalized[,1]),log(countsNormalized[,2]),labels=names(countsNormalized[,1]))
####################################################
# Scatter plots (ggplot2)
head(countsNormalized)
p <- qplot(log2(countsNormalized[ ,5]+1),log2(countsNormalized[ ,3]+1),xlab="TSA(+) SCNT", ylab="TSA(-) SCNT")
p + geom_smooth(method="lm", formula = y ~ x,se=F) + geom_abline(intercept = 0,linetype="dotted") + geom_point(size=2, alpha=0.1) + coord_flip() #+ stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE)
# Stat Smooth Function
library(proto)
stat_smooth_func <- function (mapping = NULL, data = NULL, geom = "smooth", position = "identity",
method = "auto", formula = y ~ x, se = TRUE, n = 80, fullrange = FALSE,
level = 0.95, na.rm = FALSE, ...) {
StatSmoothFunc$new(mapping = mapping, data = data, geom = geom, position = position,
method = method, formula = formula, se = se, n = n, fullrange = fullrange,
level = level, na.rm = na.rm, ...)
}
StatSmoothFunc <- proto(ggplot2:::Stat, {
objname <- "smooth"
calculate_groups <- function(., data, scales, method="auto", formula=y~x, ...) {
rows <- daply(data, .(group), function(df) length(unique(df$x)))
if (all(rows == 1) && length(rows) > 1) {
message("geom_smooth: Only one unique x value each group.",
"Maybe you want aes(group = 1)?")
return(data.frame())
}
# Figure out what type of smoothing to do: loess for small datasets,
# gam with a cubic regression basis for large data
# This is based on the size of the _largest_ group.
if (identical(method, "auto")) {
groups <- count(data, "group")
if (max(groups$freq) < 1000) {
method <- "loess"
message('geom_smooth: method="auto" and size of largest group is <1000,',
' so using loess.',
' Use \'method = x\' to change the smoothing method.')
} else {
method <- "gam"
formula <- y ~ s(x, bs = "cs")
message('geom_smooth: method="auto" and size of largest group is >=1000,',
' so using gam with formula: y ~ s(x, bs = "cs").',
' Use \'method = x\' to change the smoothing method.')
}
}
if (identical(method, "gam")) try_require("mgcv")
.super$calculate_groups(., data, scales, method = method, formula = formula, ...)
}
calculate <- function(., data, scales, method="auto", formula=y~x, se = TRUE, n=80, fullrange=FALSE, xseq = NULL, level=0.95, na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, c("x", "y"), name="stat_smooth")
if (length(unique(data$x)) < 2) {
# Not enough data to perform fit
return(data.frame())
}
if (is.null(data$weight)) data$weight <- 1
if (is.null(xseq)) {
if (is.integer(data$x)) {
if (fullrange) {
xseq <- scale_dimension(scales$x, c(0, 0))
} else {
xseq <- sort(unique(data$x))
}
} else {
if (fullrange) {
range <- scale_dimension(scales$x, c(0, 0))
} else {
range <- range(data$x, na.rm=TRUE)
}
xseq <- seq(range[1], range[2], length=n)
}
}
if (is.character(method)) method <- match.fun(method)
method.special <- function(...)
method(formula, data=data, weights=weight, ...)
model <- safe.call(method.special, list(...), names(formals(method)))
predictdf(model, xseq, se, level)
m = model
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 3),
b = format(coef(m)[2], digits = 3),
r2 = format(summary(m)$r.squared, digits = 3)))
func_string = as.character(as.expression(eq))
data.frame(x=min(data$x)*0.9, y=max(data$y)*0.9, label=func_string)
}
required_aes <- c("x", "y")
default_geom <- function(.) GeomSmooth
})
###################################################################################
# Estimate dispersion
cds <- estimateDispersions(cds)
cds <- estimateDispersions(cds, method = "blind", fitType = "local", sharingMode = "fit-only") # for individual comparison
plotDispEsts(cds)
#plotPCA
vsd <- varianceStabilizingTransformation(cds)
plotPCA(vsd,ntop = 10000)
# source code for 'plotPCA'
plotPCA = function(x, intgroup, ntop=500)
{
require("lattice")
require("genefilter")
rv = rowVars(exprs(x))
select = order(rv, decreasing=TRUE)[seq_len(ntop)]
pca = prcomp(t(exprs(x)[select,]))
fac = factor(apply(pData(vsdFull)[, intgroup], 1, paste, collapse=": "))
colours = brewer.pal(nlevels(fac), "Paired") # colors of points
pcafig = xyplot(PC2 ~ PC1, groups=fac, data=as.data.frame(pca$x), pch=16, cex=2, # can change shapes & sizes of points
aspect = "iso", col=colours,
main = draw.key(key = list(
rect = list(col = colours),
text = list(levels(fac)),
rep = FALSE)))
}
plotPCA(vsd)
##################################################################
### MI_vs_MN #####################################################
##################################################################
result_MI_MN = nbinomTest(cds,"MI","MN")
result_MI_MN = result_MI_MN[order(result_MI_MN$pval), ]
#write.csv(result_MI_MN,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MI_MN.csv")
## Histogram of p-val distribution
# hist(result_MI_MN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
## MA plot
with(IVF_TSA, plot(log10(baseMean),log2FoldChange,pch=20,main="MA plot - IVF_TSA",col="lightgrey"))
with(subset(IVF_TSA, pval<.05), points(log10(baseMean),log2FoldChange,pch=20, col="red"))
abline(h=c(-1,1),col="blue")
#volcano plot
with(IVF_TSA, plot(log2FoldChange,-log10(pval),pch=19,main="Volcano plot - IVF_TSA",col="lightgrey"))
with(subset(IVF_TSA, pval<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pval), pch=20, col="red"))
abline(v=c(-1,1),col="blue")
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(IVF_TSA, -log10(pval)>40 & abs(log2FoldChange)>5), textxy(log2FoldChange, -log10(pval), labs=id, offset=-0.5,cex=.7))
##################################################################
### MI_vs_MS #####################################################
##################################################################
result_MI_MS = nbinomTest(cds,"MI","MS")
result_MI_MS = result_MI_MS[order(result_MI_MS$pval), ]
#write.csv(result_MI_MS,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MI_MS.csv")
# Histogram of p-val distribution
# hist(result_MI_MS$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_MI_MS$baseMean,
result_MI_MS$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_MI_MS$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### MS_vs_MN #####################################################
##################################################################
result_MS_MN = nbinomTest(cds,"MS","MN")
result_MS_MN = result_MS_MN[order(result_MS_MN$pval), ]
#write.csv(result_MS_MN,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MS_MN.csv")
# Histogram of p-val distribution
# hist(result_MI_MS$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_MS_MN$baseMean,
result_MS_MN$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_MS_MN$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### FI_vs_FN #####################################################
##################################################################
result_FI_FN = nbinomTest(cds,"FI","FN")
result_FI_FN = result_FI_FN[order(result_FI_FN$pval), ]
#write.csv(result_FI_FN,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_FI_FN.csv")
# Histogram of p-val distribution
# hist(result_FI_FN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_FI_FN$baseMean,
result_FI_FN$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_FI_FN$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### FI_vs_FS #####################################################
##################################################################
result_FI_FS = nbinomTest(cds,"FI","FS")
result_FI_FS = result_FI_FS[order(result_FI_FS$pval), ]
write.csv(result_FI_FS,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_FI_FS.csv")
# Histogram of p-val distribution
# hist(result_FI_FS$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_FI_FS$baseMean,
result_FI_FS$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_FI_FS$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### FS_vs_FN #####################################################
##################################################################
result_FS_FN = nbinomTest(cds,"FS","FN")
result_FS_FN = result_FS_FN[order(result_FS_FN$pval), ]
write.csv(result_FS_FN,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_FS_FN.csv")
# Histogram of p-val distribution
# hist(result_FS_FN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_FS_FN$baseMean,
result_FS_FN$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_FS_FN$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### MI_vs_FI #####################################################
##################################################################
result_MI_FI = nbinomTest(cds,"MI","FI")
result_MI_FI = result_MI_FI[order(result_MI_FI$pval), ]
write.csv(result_MI_FI,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MI_FI.csv")
# Histogram of p-val distribution
# hist(result_FS_FN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_MI_FI$baseMean,
result_MI_FI$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_MI_FI$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### MN_vs_FN #####################################################
##################################################################
result_MN_FN = nbinomTest(cds,"MN","FN")
result_MN_FN = result_MN_FN[order(result_MN_FN$pval), ]
write.csv(result_MN_FN,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MN_FN.csv")
# Histogram of p-val distribution
# hist(result_FS_FN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_MN_FN$baseMean,
result_MN_FN$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_MN_FN$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
##################################################################
### MS_vs_FS #####################################################
##################################################################
result_MS_FS = nbinomTest(cds,"MS","FS")
result_MS_FS = result_MS_FS[order(result_MS_FS$pval), ]
write.csv(result_MS_FS,"/home/bio1/00-NGS//RNAseq//bov//niceM//DESeq/DESeq_MS_FS.csv")
# Histogram of p-val distribution
# hist(result_FS_FN$pval, breaks=100, col="skyblue",border="slateblue",main="",xlab="p-value", ylab="Frequency")
# MA plot
plot(
result_MS_FS$baseMean,
result_MS_FS$log2FoldChange,
log="x",pch=20,cex=.7,
col=ifelse(result_MS_FS$pval < .05 ,"red","grey"),
xlab="BaseMean", ylab="FoldChange"
)
abline(h=c(-1,1),col="blue")
## END ##########################################################
#####################################################################
## Merge DESeq_results + cntNorm ##
## Extract rows with p <0.05 ##
#####################################################################
cntNorm <- read.csv('~/00-NGS//RNAseq//bov//niceM//DESeq/cntNorm_ALL.csv')
head(cntNorm)
nrow(cntNorm)
# within Males
MI_MN.merged <- merge(x=result_MI_MN, y=cntNorm, by.x="id", by.y="geneSymbol")
MI_MN.merged.p05 <- MI_MN.merged[MI_MN.merged$pval < 0.05, ]
MI_MN.merged.p05 = MI_MN.merged.p05[order(MI_MN.merged.p05$pval), ]
nrow(MI_MN.merged.p05)
write.table(MI_MN.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MI_MN.merged.p05")
MI_MS.merged <- merge(x=result_MI_MS, y=cntNorm, by.x="id", by.y="geneSymbol")
MI_MS.merged.p05 <- MI_MS.merged[MI_MS.merged$pval < 0.05, ]
MI_MS.merged.p05 = MI_MS.merged.p05[order(MI_MS.merged.p05$pval), ]
nrow(MI_MS.merged.p05)
write.table(MI_MS.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MI_MS.merged.p05")
MS_MN.merged <- merge(x=result_MS_MN, y=cntNorm, by.x="id", by.y="geneSymbol")
MS_MN.merged.p05 <- MS_MN.merged[MS_MN.merged$pval < 0.05, ]
MS_MN.merged.p05 = MS_MN.merged.p05[order(MS_MN.merged.p05$pval), ]
nrow(MS_MN.merged.p05)
write.table(MS_MN.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MS_MN.merged.p05")
# within Females
FI_FN.merged <- merge(x=result_FI_FN, y=cntNorm, by.x="id", by.y="geneSymbol")
FI_FN.merged.p05 <- FI_FN.merged[FI_FN.merged$pval < 0.05, ]
FI_FN.merged.p05 = FI_FN.merged.p05[order(FI_FN.merged.p05$pval), ]
nrow(FI_FN.merged.p05)
write.table(FI_FN.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/FI_FN.merged.p05")
FI_FS.merged <- merge(x=result_FI_FS, y=cntNorm, by.x="id", by.y="geneSymbol")
FI_FS.merged.p05 <- FI_FS.merged[FI_FS.merged$pval < 0.05, ]
FI_FS.merged.p05 = FI_FS.merged.p05[order(FI_FS.merged.p05$pval), ]
nrow(FI_FS.merged.p05)
write.table(FI_FS.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/FI_FS.merged.p05")
FS_FN.merged <- merge(x=result_FS_FN, y=cntNorm, by.x="id", by.y="geneSymbol")
FS_FN.merged.p05 <- FS_FN.merged[FS_FN.merged$pval < 0.05, ]
FS_FN.merged.p05 = FS_FN.merged.p05[order(FS_FN.merged.p05$pval), ]
nrow(FS_FN.merged.p05)
write.table(FS_FN.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/FS_FN.merged.p05")
## Male vs Female
MI_FI.merged <- merge(x=result_MI_FI, y=cntNorm, by.x="id", by.y="geneSymbol")
MI_FI.merged.p05 <- MI_FI.merged[MI_FI.merged$pval < 0.05, ]
MI_FI.merged.p05 = MI_FI.merged.p05[order(MI_FI.merged.p05$pval), ]
nrow(MI_FI.merged.p05)
write.table(MI_FI.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MI_FI.merged.p05")
MN_FN.merged <- merge(x=result_MN_FN, y=cntNorm, by.x="id", by.y="geneSymbol")
MN_FN.merged.p05 <- MN_FN.merged[MN_FN.merged$pval < 0.05, ]
MN_FN.merged.p05 = MN_FN.merged.p05[order(MN_FN.merged.p05$pval), ]
nrow(MN_FN.merged.p05)
write.table(MN_FN.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MN_FN.merged.p05")
MS_FS.merged <- merge(x=result_MS_FS, y=cntNorm, by.x="id", by.y="geneSymbol")
MS_FS.merged.p05 <- MS_FS.merged[MS_FS.merged$pval < 0.05, ]
MS_FS.merged.p05 = MS_FS.merged.p05[order(MS_FS.merged.p05$pval), ]
nrow(MS_FS.merged.p05)
write.table(MS_FS.merged.p05,"~/00-NGS//RNAseq/bov/niceM/DESeq/MS_FS.merged.p05")
#############################################################
## Male Common DEGs ##
#############################################################
MI_MN.MI_MS.merged <- merge(x=result_MI_MN, y=result_MI_MN, by.x="id", by.y="id")
MI_MN_MS.merged <- merge(x=MI_MN.MI_MS.merged, y=result_MS_MN, by.x="id", by.y="id")
common_DEG_Male <- MI_MN_MS.merged[MI_MN_MS.merged$pval.x < 0.05, ]
common_DEG_Male <- common_DEG_Male[common_DEG_Male$pval.y < 0.05, ]
common_DEG_Male <- common_DEG_Male[common_DEG_Male$pval < 0.05, ]
write.table(common_DEG_Male,'~/00-NGS//RNAseq//bov//niceM//DESeq/commonDEG_Male_p05.txt')
#############################################################
## Feale Common DEGs ##
#############################################################
FI_FN.FI_FS.merged <- merge(x=result_FI_FN, y=result_FI_FN, by.x="id", by.y="id")
FI_FN_FS.merged <- merge(x=FI_FN.FI_FS.merged, y=result_FS_FN, by.x="id", by.y="id")
common_DEG_Female <- FI_FN_FS.merged[FI_FN_FS.merged$pval.x < 0.05, ]
common_DEG_Female <- common_DEG_Female[common_DEG_Female$pval.y < 0.05, ]
common_DEG_Female <- common_DEG_Female[common_DEG_Female$pval < 0.05, ]
write.table(common_DEG_Female,'~/00-NGS//RNAseq//bov//niceM//DESeq/commonDEG_Female_p05.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment