Last active
May 8, 2019 09:55
-
-
Save nilesh-tawari/d6dd3951de0ff6671ccd5b096fe13958 to your computer and use it in GitHub Desktop.
featureCountsDEseq2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
################################################ | |
################################################ | |
## REQUIREMENTS ## | |
################################################ | |
################################################ | |
## DIFFERENTIAL ANALYSIS, SCATTERPLOTS AND PCA FOR SAMPLES IN FEATURECOUNTS FILE | |
## - FIRST SIX COLUMNS OF FEATURECOUNTS_FILE SHOULD BE INTERVAL INFO. REMAINDER OF COLUMNS SHOULD BE SAMPLES-SPECIFIC COUNTS. | |
## - SAMPLE NAMES HAVE TO END IN "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. | |
## - BAM_SUFFIX IS PORTION OF FILENAME AFTER SAMPLE NAME IN FEATURECOUNTS COLUMN SAMPLE NAMES E.G. ".rmDup.bam" if "DRUG_R1.rmDup.bam" | |
## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R | |
################################################ | |
################################################ | |
## LOAD LIBRARIES ## | |
################################################ | |
################################################ | |
library(optparse) | |
library(DESeq2) | |
library(vsn) | |
library(ggplot2) | |
library(RColorBrewer) | |
library(pheatmap) | |
library(lattice) | |
################################################ | |
################################################ | |
## PARSE COMMAND-LINE PARAMETERS ## | |
################################################ | |
################################################ | |
# option_list <- list(make_option(c("-i", "--featurecount_file"), type="character", default=NULL, help="Feature count file generated by the SubRead featureCounts command.", metavar="featurecount_file"), | |
# make_option(c("-g", "--group_file"), type="character", default=NULL, help="Condition file, with sample id and groups information.", metavar="group_file"), | |
# make_option(c("-b", "--bam_suffix"), type="character", default=NULL, help="Portion of filename after sample name in featurecount file header e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'", metavar="bam_suffix"), | |
# make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), | |
# make_option(c("-p", "--outprefix"), type="character", default='differential', help="Output prefix", metavar="character"), | |
# make_option(c("-s", "--outsuffix"), type="character", default='', help="Output suffix for comparison-level results", metavar="character")) | |
# | |
# opt_parser <- OptionParser(option_list=option_list) | |
# opt <- parse_args(opt_parser) | |
#if (is.null(opt$featurecount_file)){ | |
# print_help(opt_parser) | |
# stop("Please provide featurecount file.", call.=FALSE) | |
#} | |
# if (is.null(opt$bam_suffix)){ | |
# print_help(opt_parser) | |
# stop("Please provide bam suffix in header of featurecount file.", call.=FALSE) | |
# } | |
################################################ | |
################################################ | |
## READ IN COUNTS FILE ## | |
################################################ | |
################################################ | |
#count.table <- read.delim(file=opt$featurecount_file,header=TRUE,skip=1) | |
setwd("/lrlhps/users/c274411/07_cat_RNAseq/analysis/results/featureCounts/DEseq2/test") | |
#analysis_func_go_kegg | |
count.table <- read.csv('analysis_fc.txt', sep="\t", head=TRUE) | |
coldata <- read.csv('analysis_pheno.txt', sep="\t", row.names=1) | |
rownames(count.table) <- count.table[,1] | |
#resultDESeq2[,1] <- NULL | |
#colnames(count.table) <- gsub(opt$bam_suffix,"",colnames(count.table)) | |
#colnames(count.table) <- as.character(lapply(colnames(count.table), function (x) tail(strsplit(x,'.',fixed=TRUE)[[1]],1))) | |
#rownames(count.table) <- count.table$Geneid | |
interval.table <- count.table[,0:2] | |
count.table <- count.table[,3:ncol(count.table),drop=FALSE] | |
################################################ | |
################################################ | |
## RUN DESEQ2 ## | |
################################################ | |
################################################ | |
#if (file.exists(opt$outdir) == FALSE) { | |
# dir.create(opt$outdir,recursive=TRUE) | |
#} | |
#samples.vec <- sort(colnames(count.table)) | |
samples.vec <- colnames(count.table) | |
groups <- substr(coldata$condition, 1, 20) | |
print(unique(groups)) | |
if (length(unique(groups)) == 1) { | |
quit(save = "no", status = 0, runLast = FALSE) | |
} | |
############### Descriptive statistics | |
# Define a group-specific color for each sample, and add it as a supplementary column | |
groupColor <- c("Group_1"="violet", "Group_2"="red", "Group_3"="blue", "Group_4"="green", "Group_5"="yellow","Group_6"="orange") # Choose one color per group | |
print(groupColor) | |
## Add a column to the pheno type to specify the color of each sample according to its genotype | |
coldata$color[coldata$condition == "Group_1"] <- groupColor["Group_1"] | |
coldata$color[coldata$condition == "Group_2"] <- groupColor["Group_2"] | |
coldata$color[coldata$condition == "Group_3"] <- groupColor["Group_3"] | |
coldata$color[coldata$condition == "Group_4"] <- groupColor["Group_4"] | |
coldata$color[coldata$condition == "Group_5"] <- groupColor["Group_5"] | |
coldata$color[coldata$condition == "Group_6"] <- groupColor["Group_6"] | |
#Basic statistics | |
statsPerSample <- data.frame(t(apply(count.table, 2, summary))) | |
names(statsPerSample) <- c("min", "Q1", "median", "mean", "Q3", "max") | |
statsPerSample[,c("perc5", "perc10", "perc90", "perc95")] <- | |
apply(count.table, 2, quantile, c(0.05, 0.10, 0.90, 0.95)) | |
statsPerSample$zeros <- apply(count.table==0, 2, sum) | |
statsPerSample$percZeros <- 100*statsPerSample$zeros/nrow(count.table) | |
write.table(statsPerSample,file=paste("analysis",".statsPerSample.txt",sep=""),row.names=TRUE,col.names=TRUE,sep="\t",quote=TRUE) | |
# DEseq2 | |
DDSFile <- paste("analysis",".dds.rld.RData",sep="") | |
if (file.exists(DDSFile) == FALSE) { | |
counts <- count.table[,samples.vec,drop=FALSE] | |
#data.frame(row.names=colnames(counts),condition=groups) | |
dds <- DESeqDataSetFromMatrix(countData = round(counts), colData = coldata, design = ~ condition) | |
print(dds) | |
is(dds) | |
isS4(dds) | |
slotNames(dds) | |
dds <- DESeq(dds) | |
rld <- rlog(dds) | |
save(dds,rld,file=DDSFile) | |
} | |
################ Interactive PCA | |
# expmatrix <- SummarizedExperiment::assay(rld) | |
# if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
# BiocManager::install("pcaExplorer", version = "3.8") | |
#library("pcaExplorer") | |
#pcaExplorer(dds, rld) | |
#var_vs_mean(expmatrix) | |
#library(corrgram) | |
################################################ | |
################################################ | |
## PLOT QC ## | |
################################################ | |
################################################ | |
PlotFile <- paste("analysis",".plots.pdf",sep="") | |
if (file.exists(PlotFile) == FALSE) { | |
pdf(file=PlotFile,onefile=TRUE,width=7,height=7) | |
# histogram of counts | |
par(mfrow=c(3,1)) | |
hist(as.matrix(count.table), col="blue", border="white", breaks=100) | |
hist(as.matrix(count.table), col="blue", border="white", | |
breaks=20000, xlim=c(0,2000), main="Counts per gene", | |
xlab="Counts (truncated axis)", ylab="Number of genes", | |
las=1, cex.axis=0.7) | |
epsilon <- 1 # pseudo-count to avoid problems with log(0) | |
hist(as.matrix(log2(count.table + epsilon)), breaks=100, col="blue", border="white", | |
main="Log2-transformed counts per gene", xlab="log2(counts+1)", ylab="Number of genes", | |
las=1, cex.axis=0.7) | |
par(mfrow=c(1,1)) | |
# box plot of counts | |
boxplot(log2(count.table + epsilon), pch=".", col=coldata$color, | |
horizontal=TRUE, cex.axis=0.5, | |
las=1, ylab="Samples", xlab="log2(counts +1)") | |
# Density plot | |
# We will require one function from the affy package | |
if(!require("affy")){ | |
source("http://bioconductor.org/biocLite.R") | |
biocLite("affy") | |
} | |
library(affy) | |
plotDensity(log2(count.table + epsilon), col=coldata$color, lty=1, lwd=2) | |
grid() | |
legend("topright", legend=names(groupColor), col=groupColor, lwd=2) | |
dds.norm <- estimateSizeFactors(dds) | |
# Checking the normalization | |
par(mfrow=c(2,2),cex.lab=0.7) | |
boxplot(log2(counts(dds.norm)+epsilon), col=coldata$color, cex.axis=0.7, | |
las=1, xlab="log2(counts+1)", horizontal=TRUE, main="Raw counts") | |
boxplot(log2(counts(dds.norm, normalized=TRUE)+epsilon), col=coldata$color, cex.axis=0.7, | |
las=1, xlab="log2(normalized counts)", horizontal=TRUE, main="Normalized counts") | |
plotDensity(log2(counts(dds.norm)+epsilon), col=coldata$color, | |
xlab="log2(counts+1)", cex.lab=0.7, panel.first=grid()) | |
plotDensity(log2(counts(dds.norm, normalized=TRUE)+epsilon), col=coldata$color, | |
xlab="log2(normalized counts)", cex.lab=0.7, panel.first=grid()) | |
# Restore default parameters | |
par(mfrow=c(1,1), cex.lab=1) | |
# # Scatter plot | |
# # Define a function to draw a scatter plot for a pair of variables (samples) with density colors | |
# plotFun <- function(x,y){ | |
# dns <- densCols(x,y); | |
# points(x,y, col=dns, pch=".", panel.first=grid()); | |
# # abline(a=0, b=1, col="brown") | |
# } | |
# | |
# # Plot the scatter plot for a few pairs of variables selected at random | |
# set.seed(123) # forces the random number generator to produce fixed results | |
# pairs(log2(count.table[,sample(ncol(count.table),10)] + epsilon), | |
# panel=plotFun, lower.panel = NULL) | |
# | |
## PCA | |
pca.data <- DESeq2::plotPCA(rld,intgroup=c("condition"),returnData=TRUE) | |
percentVar <- round(100 * attr(pca.data, "percentVar")) | |
myplot <- ggplot(pca.data, aes(PC1, PC2, color=condition)) + | |
geom_point(size=3) + | |
xlab(paste0("PC1: ",percentVar[1],"% variance")) + | |
ylab(paste0("PC2: ",percentVar[2],"% variance")) + | |
theme(panel.grid.major = element_blank(), | |
panel.grid.minor = element_blank(), | |
panel.background = element_blank(), | |
panel.border = element_rect(colour = "black", fill=NA, size=1)) | |
print(myplot) | |
## WRITE PC1 vs PC2 VALUES TO FILE | |
pca.vals <- pca.data[,1:2] | |
colnames(pca.vals) <- paste(colnames(pca.vals),paste(percentVar,'% variance',sep=""), sep=": ") | |
pca.vals <- cbind(sample = rownames(pca.vals), pca.vals) | |
write.table(pca.vals,file=paste("analysis",".pca.vals.txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=TRUE) | |
## Dispersion plot | |
plotDispEsts(dds, main="Dispersion plot") | |
## SAMPLE CORRELATION HEATMAP | |
sampleDists <- dist(t(assay(rld))) | |
sampleDistMatrix <- as.matrix(sampleDists) | |
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) | |
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors) | |
library(RColorBrewer) | |
library(gplots) | |
(mycols <- brewer.pal(8, "Dark2")[1:length(unique(coldata$condition))]) | |
par(mar=c(10,4,4,2)) | |
heatmap.2(sampleDistMatrix, key=F, trace="none", | |
col=colorRampPalette( rev(brewer.pal(9, "Blues")) )(255), | |
ColSideColors=mycols[coldata$condition], RowSideColors=mycols[coldata$condition], | |
margin=c(10, 10), main="Sample Distance Matrix",cexRow=0.8, cexCol=0.8) | |
## Box plot of counts | |
par(mar=c(10,3,1,1)) | |
nor.counts <- counts(dds,normalized=TRUE) | |
nor.counts = log2(nor.counts+1) | |
par(cex.lab=0.8) # is for y-axis | |
par(cex.axis=0.8) # is for x-axis | |
boxplot(nor.counts,col=mycols[coldata$condition],las=2,ylab='log2(nor.counts+1)', boxwex = 0.5) | |
## WRITE SAMPLE DISTANCES TO FILE | |
write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste("analysis",".sample.dists.txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE) | |
dev.off() | |
} | |
#plotDensity(log2(count.table + epsilon), col=coldata$color, lty=1, lwd=2) | |
#grid() | |
#legend("topright", legend=names(groupColor), col=groupColor, lwd=2) | |
################################################ | |
################################################ | |
## SAVE SIZE FACTORS ## | |
################################################ | |
################################################ | |
SizeFactorsDir <- "sizeFactors/" | |
if (file.exists(SizeFactorsDir) == FALSE) { | |
dir.create(SizeFactorsDir,recursive=TRUE) | |
} | |
NormFactorsFile <- paste(SizeFactorsDir, "analysis",".sizeFactors.RData",sep="") | |
if (file.exists(NormFactorsFile) == FALSE) { | |
normFactors <- sizeFactors(dds) | |
save(normFactors,file=NormFactorsFile) | |
for (name in names(sizeFactors(dds))) { | |
sizeFactorFile <- paste(SizeFactorsDir,name,"analysis",".sizeFactor.txt",sep="") | |
if (file.exists(sizeFactorFile) == FALSE) { | |
write(as.numeric(sizeFactors(dds)[name]),file=sizeFactorFile) | |
} | |
} | |
} | |
################################################ | |
################################################ | |
## WRITE LOG FILE ## | |
################################################ | |
################################################ | |
LogFile <- paste("analysis",".log",sep="") | |
if (file.exists(LogFile) == FALSE) { | |
cat("\nSamples =",samples.vec,"\n\n",file=LogFile,append=TRUE,sep=', ') | |
cat("Groups =",groups,"\n\n",file=LogFile,append=TRUE,sep=', ') | |
cat("Dimensions of count matrix =",dim(counts),"\n\n",file=LogFile,append=FALSE,sep=' ') | |
cat("\n",file=LogFile,append=TRUE,sep='') | |
} | |
################################################ | |
################################################ | |
## LOOP THROUGH COMPARISONS ## | |
################################################ | |
################################################ | |
ResultsFile <- paste("analysis",".results.txt",sep="") | |
raw.counts <- counts(dds,normalized=FALSE) | |
colnames(raw.counts) <- paste(colnames(raw.counts),'raw',sep='.') | |
pseudo.counts <- counts(dds,normalized=TRUE) | |
colnames(pseudo.counts) <- paste(colnames(pseudo.counts),'pseudo',sep='.') | |
deseq2_results_list <- list() | |
comparisons <- combn(unique(groups),2) | |
#ncol(comparisons) | |
library(AnnotationHub) | |
hub <- AnnotationHub() | |
#d <- display(hub) | |
#query(hub, "Felis") | |
Felis <- hub[["AH66297"]] #org.Felis_catus.eg.sqlite AH61890 | |
#snapshotDate(): 2018-10-24 snapshotDate(): 2018-04-30 | |
#pd <- possibleDates(hub) | |
#snapshotDate(hub) <- pd[111] | |
library(clusterProfiler) | |
#1:ncol(comparisons) | |
# 2, 3, 8, 12 | |
# | |
for (idx in 1:ncol(comparisons)) { | |
control.group <- comparisons[1,idx] | |
treat.group <- comparisons[2,idx] | |
CompPrefix <- paste(control.group,treat.group,sep="vs") | |
cat("Saving results for ",CompPrefix," ...\n",sep="") | |
CompOutDir <- paste(CompPrefix,'/',sep="") | |
if (file.exists(CompOutDir) == FALSE) { | |
dir.create(CompOutDir,recursive=TRUE) | |
} | |
control.samples <- samples.vec[which(groups == control.group)] | |
treat.samples <- samples.vec[which(groups == treat.group)] | |
comp.samples <- c(control.samples,treat.samples) | |
print(c(control.group,treat.group)) | |
print("=================================") | |
print(control.group) | |
print("control.samples") | |
print(control.samples) | |
print("=================================") | |
print("treat.samples") | |
print(treat.group) | |
print(treat.samples) | |
print("=================================") | |
print(c("condition",c(control.group,treat.group))) | |
comp.results <- results(dds,contrast=c("condition",c(control.group,treat.group))) | |
# Annotations ####################################################################################### | |
# library(biomaRt) # Install the biomaRt package (Bioconductor) if this command does not work | |
# Get the Ensembl ID to gene name mapping: | |
# mart <- useMart("ENSEMBL_MART_ENSEMBL", dataset = "fcatus_gene_ensembl", host="www.ensembl.org") | |
# ensembl2name <- getBM(attributes=c("ensembl_gene_id","external_gene_name","entrezgene", "description"),mart=mart) | |
#library(stringr) | |
#ensembl2name$test <- str_split_fixed(ensembl2name$description, "Acc:", 2) | |
#, "uniprot_gn" | |
# Merge it with our gene-level statistics: | |
# comp.results <- merge(x=as.data.frame(comp.results), y=ensembl2name, by.x=0, by.y=1, all.x=TRUE) | |
# colnames(comp.results) <- c("ensembl", "baseMean","log2FoldChange", "lfcSE", "stat", "pvalue", "padj", "gene", "entrezgene", "description") | |
# , "uniprot_gn" | |
# comp.results <- comp.results[!duplicated(comp.results$ensembl), ] #### keep uniq gene ids , fromLast=T | |
# rownames(comp.results) <- comp.results[,1] | |
#library(tidyr) | |
#a <- separate(data = ensembl2name, col = description, into = c("left", "right"), sep = "Acc:") | |
#res <- comp.results | |
comp.df <- as.data.frame(comp.results) | |
comp.table <- cbind(interval.table, as.data.frame(comp.df), raw.counts[,paste(comp.samples,'raw',sep='.')], pseudo.counts[,paste(comp.samples,'pseudo',sep='.')]) | |
resultDESeq2 <- as.data.frame(comp.results) | |
#resultDESeq2 <- cbind(interval.table, as.data.frame(comp.results)) | |
#rownames(resultDESeq2) <- resultDESeq2[,1] | |
#resultDESeq2[,1] <- NULL | |
## WRITE RESULTS FILE | |
CompResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.results.txt",sep="") | |
write.table(comp.table, file=CompResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
## FILTER RESULTS BY FDR & LOGFC AND WRITE RESULTS FILE | |
pdf(file=paste(CompOutDir,CompPrefix,"analysis",".deseq2.plots.pdf",sep=""),width=10,height=8) | |
if (length(comp.samples) > 2) { | |
for (MIN_FDR in c(0.05)) { | |
#0.01, | |
## SUBSET RESULTS BY FDR | |
pass.fdr.table <- subset(comp.table, padj < MIN_FDR) | |
pass.fdr.up.table <- subset(comp.table, padj < MIN_FDR & log2FoldChange > 0) | |
pass.fdr.down.table <- subset(comp.table, padj < MIN_FDR & log2FoldChange < 0) | |
## SUBSET RESULTS BY FDR AND LOGFC | |
pass.fdr.logFC.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1) | |
pass.fdr.logFC.up.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1 & log2FoldChange > 0) | |
pass.fdr.logFC.down.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1 & log2FoldChange < 0) | |
## WRITE RESULTS FILE | |
CompResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".results.txt",sep="") | |
CompBEDFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".results.bed",sep="") | |
write.table(pass.fdr.table, file=CompResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
write.table(pass.fdr.table[,c("Geneid","log2FoldChange")], file=CompBEDFile, col.names=FALSE, row.names=FALSE, sep='\t', quote=FALSE) | |
# | |
CompResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".FC2.results.txt",sep="") | |
CompBEDFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".FC2.results.bed",sep="") | |
write.table(pass.fdr.logFC.table, file=CompResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
write.table(pass.fdr.logFC.table[,c("Geneid","log2FoldChange")], file=CompBEDFile, col.names=FALSE, row.names=FALSE, sep='\t', quote=FALSE) | |
#Geneid | |
## MA PLOT & VOLCANO PLOT | |
#DESeq2::plotMA(comp.results, main=paste("MA plot FDR <= ",MIN_FDR,sep=""), colNonSig = "blue", ylim=c(-2,2),alpha=MIN_FDR) | |
#abline(h=c(-1:1), col="red") | |
#plot(comp.table$log2FoldChange, -1*log10(comp.table$padj), col=ifelse(comp.table$padj<=MIN_FDR, "red", "black"), xlab="logFC", ylab="-1*log10(FDR)", main=paste("Volcano plot FDR <=",MIN_FDR,sep=" "), pch=20) | |
# VOLCANO with labels, Volcano plot | |
alpha <- MIN_FDR # Threshold on the p-value | |
# par(mfrow=c(1,2)) | |
# Compute significance, with a maximum of 320 for the p-values set to 0 due to limitation of computation precision | |
resultDESeq2$sig <- -log10(resultDESeq2$padj) | |
sum(is.infinite(resultDESeq2$sig)) | |
resultDESeq2[is.infinite(resultDESeq2$sig),"sig"] <- 350 | |
# View(resultDESeq2[is.na(resultDESeq2$pvalue),]) | |
# Select genes with a defined p-value (DESeq2 assigns NA to some genes) | |
genes.to.plot <- !is.na(resultDESeq2$pvalue) | |
# sum(genes.to.plot) | |
range(resultDESeq2[genes.to.plot, "log2FoldChange"]) | |
# View(resultDESeq2[genes.to.plot,]) | |
# ## Volcano plot of adjusted p-values | |
# cols <- densCols(resultDESeq2$log2FoldChange, resultDESeq2$sig) | |
# cols[resultDESeq2$pvalue ==0] <- "purple" | |
# resultDESeq2$pch <- 19 | |
# resultDESeq2$pch[resultDESeq2$pvalue ==0] <- 6 | |
# plot(resultDESeq2$log2FoldChange, | |
# resultDESeq2$sig, | |
# col=cols, panel.first=grid(), | |
# main="Volcano plot", | |
# xlab="Effect size: log2(fold-change)", | |
# ylab="-log10(adjusted p-value)", | |
# pch=resultDESeq2$pch, cex=0.4) | |
# abline(v=0) | |
# abline(v=c(-1,1), col="brown") | |
# abline(h=-log10(alpha), col="brown") | |
# | |
# ## Plot the names of a reasonable number of genes, by selecting those begin not only significant but also having a strong effect size | |
# gn.selected <- abs(resultDESeq2$log2FoldChange) > 2 & resultDESeq2$padj < alpha | |
# text(resultDESeq2$log2FoldChange[gn.selected], | |
# -log10(resultDESeq2$padj)[gn.selected], | |
# lab=rownames(resultDESeq2)[gn.selected ], cex=0.6) | |
################################################ | |
################################################ | |
################################################ | |
################################################ | |
## GO TERM ANALYSIS USING GPROFILER ## | |
library(gprofiler2) | |
resultDESeq2.df <- na.omit(data.frame(comp.results)) | |
resultDESeq2.df <- resultDESeq2.df[order((resultDESeq2.df$log2FoldChange), decreasing = TRUE),] | |
induced.sign <- rownames(resultDESeq2.df)[resultDESeq2.df$log2FoldChange >= 1 & resultDESeq2.df$padj < alpha] | |
# Sort the gene-level statistics by adjusted p-value: | |
term.induced <- gost(query=induced.sign, organism = "fcatus", ordered_query = TRUE, | |
multi_query = FALSE, significant = FALSE, exclude_iea = TRUE, | |
measure_underrepresentation = FALSE, evcodes = FALSE, | |
user_threshold = 0.05, correction_method = "g_SCS", | |
domain_scope = "annotated", custom_bg = NULL, | |
numeric_ns = "", sources = NULL) | |
termres <- term.induced$result | |
termres <- termres[order(termres$p_value),] | |
p <- gostplot(term.induced, capped = FALSE, interactive = FALSE) | |
ProResultsFileUp <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".gprofiler.up.txt",sep="") | |
ProResultsFileUppdf <- paste(CompOutDir,CompPrefix,"analysis","_deseq2_FDR",MIN_FDR*100,"_gprofiler_up.pdf",sep="") | |
h1 <- as.list(term.induced$result$term_id[1:4]) | |
publish_gostplot(p, highlight_terms = h1, filename = ProResultsFileUppdf) | |
write.table(termres, file=ProResultsFileUp, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
resultDESeq2.df <- resultDESeq2.df[order((resultDESeq2.df$log2FoldChange), decreasing = FALSE),] | |
repressed.sign <- rownames(resultDESeq2.df)[resultDESeq2.df$log2FoldChange <= -1 & resultDESeq2.df$padj < alpha] | |
# Sort the gene-level statistics by adjusted p-value: | |
term.repressed <- gost(query=repressed.sign, organism = "fcatus", ordered_query = TRUE, | |
multi_query = FALSE, significant = FALSE, exclude_iea = TRUE, | |
measure_underrepresentation = FALSE, evcodes = FALSE, | |
user_threshold = 0.05, correction_method = "g_SCS", | |
domain_scope = "annotated", custom_bg = NULL, | |
numeric_ns = "", sources = NULL) | |
termresrep <- term.repressed$result | |
termresrep <- termresrep[order(termresrep$p_value),] | |
r <- gostplot(term.repressed, capped = FALSE, interactive = FALSE) | |
ProResultsFileDN <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".gprofiler.dn.txt",sep="") | |
ProResultsFileDNpdf <- paste(CompOutDir,CompPrefix,"analysis","_deseq2_FDR",MIN_FDR*100,"_gprofiler_dn.pdf",sep="") | |
h <- as.list(term.repressed$result$term_id[1:5]) | |
publish_gostplot(r, highlight_terms = h, filename = ProResultsFileDNpdf) | |
write.table(termresrep, file=ProResultsFileDN, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
######################################################################################## | |
library(biomaRt) # Install the biomaRt package (Bioconductor) if this command does not work | |
geneLevelStats <- subset(resultDESeq2.df , padj < alpha) #abs(log2FoldChange) >= 1 & | |
# Get the Ensembl ID to gene name mapping: | |
mart <- useMart("ENSEMBL_MART_ENSEMBL", dataset = "fcatus_gene_ensembl", host="www.ensembl.org") | |
ensembl2name <- getBM(attributes=c("ensembl_gene_id","external_gene_name","entrezgene", "description"),mart=mart) | |
#library(stringr) | |
#ensembl2name$test <- str_split_fixed(ensembl2name$description, "Acc:", 2) | |
#, "uniprot_gn" | |
# Merge it with our gene-level statistics: | |
geneLevelStats <- merge(x=geneLevelStats, y=ensembl2name, by.x=0, by.y=1, all.x=TRUE) | |
colnames(geneLevelStats) <- c("ensembl","baseMean","log2FoldChange", "lfcSE", "stat", "pvalue", "padj","gene","entrezgene", "description") | |
# , "uniprot_gn" | |
geneLevelStats <- geneLevelStats[!duplicated(geneLevelStats$ensembl), ] #### keep uniq gene ids , fromLast=T | |
rownames(geneLevelStats) <- geneLevelStats[,1] | |
# Sort the gene-level statistics by adjusted p-value: | |
geneLevelStats <- geneLevelStats[order(geneLevelStats$padj),] | |
## KEGG over-representation test | |
mydf <- geneLevelStats #data.frame(Entrez=gene, FC=geneList) | |
mydf <- mydf[abs(mydf$log2FoldChange) > 1,] | |
mydf$group <- "Up" | |
mydf$group[mydf$log2FoldChange < 0] <- "Down" | |
mydf$othergroup <- "<4 fold" | |
mydf$othergroup[abs(mydf$log2FoldChange) > 2] <- ">4 fold" | |
#### | |
formula_res_go <- compareCluster(ensembl~group, data=mydf, fun="enrichGO", OrgDb = Felis, keyType = 'ENSEMBL', | |
pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.05, readable = TRUE) | |
GOResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".GO.txt",sep="") | |
write.table(as.data.frame(formula_res_go), file=GOResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
title_comp_go = paste(CompPrefix, " enrichGO (FDR:",MIN_FDR, " FC>2)",sep="") | |
print(dotplot(formula_res_go, x=~group, showCategory=20, title=title_comp_go)) #+ ggplot2::facet_grid(~othergroup)) | |
formula_res <- compareCluster(entrezgene~group, data=mydf, fun="enrichKEGG", organism = 'fca') | |
#head(as.data.frame(formula_res)) | |
KEGGResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".kegg.txt",sep="") | |
write.table(as.data.frame(formula_res), file=KEGGResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
#dotplot(formula_res) | |
title_comp = paste(CompPrefix, " enrichKEGG (FDR:",MIN_FDR, " FC>2)",sep="") | |
print(dotplot(formula_res, x=~group, showCategory=20, title=title_comp)) #+ ggplot2::facet_grid(~othergroup)) | |
#### | |
formula_res_gp_go <- compareCluster(ensembl~group+othergroup, data=mydf, fun="enrichGO", OrgDb = Felis, keyType = 'ENSEMBL', | |
pAdjustMethod = "BH", pvalueCutoff = 0.05, qvalueCutoff = 0.05, readable = TRUE) | |
GOgpResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".GOgp.txt",sep="") | |
write.table(as.data.frame(formula_res_gp_go), file=GOgpResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
title_comp_gp_go = paste(CompPrefix, " enrichGO fold comparision (FDR:",MIN_FDR, " FC>2)",sep="") | |
print(dotplot(formula_res_gp_go, x=~group, showCategory=20, title=title_comp_gp_go) + ggplot2::facet_grid(~othergroup)) | |
formula_res_gp <- compareCluster(entrezgene~group+othergroup, data=mydf, fun="enrichKEGG", organism = 'fca') | |
#head(as.data.frame(formula_res_gp)) | |
KEGGResultsFile <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".kegg_grouped.txt",sep="") | |
write.table(as.data.frame(formula_res_gp), file=KEGGResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
#dotplot(formula_res_gp) | |
title_comp_gp = paste(CompPrefix, " enrichKEGG fold comparision" , " (FDR:",MIN_FDR, " FC>2)",sep="") | |
print(dotplot(formula_res_gp, x=~group, showCategory=20, title=title_comp_gp) + ggplot2::facet_grid(~othergroup)) | |
############ | |
mydf_up <- mydf[mydf$log2FoldChange > 1,] | |
mydf_up <- na.omit(mydf_up) | |
gene <- mydf_up$entrezgene | |
geneList <- as.vector(mydf_up$log2FoldChange) | |
kk <- clusterProfiler::enrichKEGG(gene = gene, | |
organism = 'fca', | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 0.05) | |
# #gene = geneLevelStats$ensembl, keyType = 'ENSEMBL' | |
# | |
# head(as.data.frame(kk)[,-8]) | |
#ProResultsFileUp <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".enrichKEGG.up.txt",sep="") | |
#write.table(as.data.frame(kk), file=ProResultsFileUp, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
# #plots | |
#print(cnetplot(kk, categorySize="geneNum", foldChange=geneList)) | |
kk <- setReadable(kk, OrgDb = Felis, keyType = "ENTREZID") | |
tryCatch(print(cnetplot(kk, categorySize="pvalue", foldChange=geneList)), error = function(e) cat("Error: ",e$message, "\n")) | |
# print(emapplot(kk)) | |
# print(barplot(kk, drop=TRUE, showCategory=12)) | |
#title_up = paste(CompPrefix, " (Upregulated, FDR:",MIN_FDR, ", FC>2)",sep="") | |
#print(dotplot(kk, title=title_up)) | |
mydf_down <- mydf[mydf$log2FoldChange < -1,] | |
mydf_down <- na.omit(mydf_down) | |
gene <- mydf_down$entrezgene | |
geneList <- as.vector(mydf_down$log2FoldChange) | |
kk_dn <- clusterProfiler::enrichKEGG(gene = gene, | |
organism = 'fca', | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 0.05) | |
# #gene = geneLevelStats$ensembl, keyType = 'ENSEMBL' | |
# | |
# head(as.data.frame(kk)[,-8]) | |
#ProResultsFileDn <- paste(CompOutDir,CompPrefix,"analysis",".deseq2.FDR",MIN_FDR,".enrichKEGG.dn.txt",sep="") | |
#write.table(as.data.frame(kk_dn), file=ProResultsFileDn, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
# # #plots | |
#print(cnetplot(kk_dn, categorySize="geneNum", foldChange=geneList)) | |
#print(cnetplot(kk_dn, categorySize="pvalue", foldChange=geneList)) | |
kk_dn <- setReadable(kk_dn, OrgDb = Felis, keyType = "ENTREZID") | |
tryCatch(print(cnetplot(kk_dn, categorySize="pvalue", foldChange=geneList)), error = function(e) cat("Error: ",e$message, "\n")) | |
#kk_dn1@gene2Symbol <- as.character(mydf_down1$gene) | |
#kk_dn1@keytype <- 'auto' | |
#kk_dn1@result <- kk_dn1@result | |
#kk_dn1@readable <- TRUE | |
# heatplot(kk_dn, foldChange = geneList, showCategory = 100)+ ggplot2::coord_flip() | |
# print(emapplot(kk_dn)) | |
# print(barplot(kk_dn, drop=TRUE, showCategory=12)) | |
# print(dotplot(kk_dn)) | |
#title_dn = paste(CompPrefix, " (Downregulated, FDR:",MIN_FDR, ", FC>2)",sep="") | |
#print(dotplot(kk_dn, title=title_dn)) | |
## ADD COUNTS TO LOGFILE | |
cat(CompPrefix," genes with FDR <= ",MIN_FDR,": ",nrow(pass.fdr.table)," (up=",nrow(pass.fdr.up.table),", down=",nrow(pass.fdr.down.table),")","\n",file=LogFile,append=TRUE,sep="") | |
cat(CompPrefix," genes with FDR <= ",MIN_FDR," & FC > 2: ",nrow(pass.fdr.logFC.table)," (up=",nrow(pass.fdr.logFC.up.table),", down=",nrow(pass.fdr.logFC.down.table),")","\n",file=LogFile,append=TRUE,sep="") | |
## correlogram | |
# corrgram::corrgram(raw.counts[,control.samples], order=TRUE, lower.panel=corrgram::panel.pie, | |
# upper.panel=corrgram::panel.pts, text.panel=corrgram::panel.txt, | |
# main="Correlogram of controls") | |
} | |
cat("\n",file=LogFile,append=TRUE,sep="") | |
} | |
## SAMPLE CORRELATION HEATMAP | |
rld.subset <- assay(rld)[,comp.samples] | |
sampleDists <- dist(t(rld.subset)) | |
sampleDistMatrix <- as.matrix(sampleDists) | |
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) | |
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors) | |
#print(mycols) | |
(mycols1 <- brewer.pal(8, "Dark2")[1:length(unique(c(control.group,treat.group)))]) | |
cl <-length(control.samples) | |
tl <-length(treat.samples) | |
col_scale <- c(rep(mycols1[1], cl), rep(mycols1[2], tl)) | |
par(mar=c(10,4,4,2)) | |
heatmap.2(sampleDistMatrix, key=F, trace="none", | |
col=colorRampPalette( rev(brewer.pal(9, "Blues")) )(255), | |
ColSideColors=col_scale[!is.na(col_scale)], RowSideColors=col_scale[!is.na(col_scale)], | |
margin=c(10, 10), main="Sample Distance Matrix",cexRow=0.8, cexCol=0.8) | |
# ## SCATTER PLOT FOR RLOG COUNTS | |
# combs <- combn(comp.samples,2,simplify=FALSE) | |
# clabels <- sapply(combs,function(x){paste(x,collapse=' & ')}) | |
# plotdat <- data.frame(x=unlist(lapply(combs, function(x){rld.subset[, x[1] ]})),y=unlist(lapply(combs, function(y){rld.subset[, y[2] ]})),comp=rep(clabels, each=nrow(rld.subset))) | |
# myplot <- xyplot(y~x|comp,plotdat, | |
# panel=function(...){ | |
# panel.xyplot(...) | |
# panel.abline(0,1,col="red") | |
# }, | |
# par.strip.text=list(cex=0.5)) | |
#print(myplot) | |
## Top gene | |
# res <- res[order(res$padj), ] | |
# topGene <- rownames(res)[which.min(res$padj)] | |
# print(res[topGene,]) | |
# # plotCounts(dds, gene=topGene, intgroup=c("condition")) | |
# | |
# ## all genes with padj < 0.05 and logFC > 1 | |
# # res.plot <- subset(res, padj < 0.05 & abs(log2FoldChange) >= 1) | |
# # for(i in 1:nrow(res.plot)) { | |
# # row <- res.plot[i,] | |
# # plotCounts(dds, gene=rownames(row), intgroup=c("condition")) | |
# # } | |
# ## Examine plot of p-values, draw an histogram of the p-values | |
hist(resultDESeq2$pvalue, breaks=50, col="grey", main="DESeq2 p-value distribution", xlab="Nominal P-value", ylab="Number of genes") | |
######################################################### | |
resultDESeq2$sig <- -log10(resultDESeq2$padj) | |
selectedGenes <- c( | |
"Most significant" = rownames(resultDESeq2)[which.max(resultDESeq2$sig)]) | |
# top.logFC = rownames(resultDESeq2)[which.max(resultDESeq2$log2FoldChange)]) | |
#gn.most.sign <- rownames(resultDESeq2)[1] | |
#gn.most.diff.val <- counts(dds.norm, normalized=T)[gn.most.sign,] | |
## Select a gene with small fold change but high significance | |
sel1 <- subset( | |
na.omit(resultDESeq2), | |
sig >= 50 & log2FoldChange > 0 & log2FoldChange < 0.5) | |
# dim(sel1) | |
selectedGenes <- append(selectedGenes, | |
c("Small FC yet significant"=rownames(sel1)[1])) | |
## Select the non-significant gene with the highest fold change | |
sel2 <- subset(x = na.omit(resultDESeq2), padj > alpha & log2FoldChange > 0 & baseMean > 1000 & baseMean < 10000) | |
# dim(sel2) | |
sel2 <- sel2[order(sel2$log2FoldChange, decreasing = TRUE),][1,] | |
selectedGenes <- append( | |
selectedGenes, | |
c("Non-significant"=rownames(sel2)[1])) | |
par(mfrow=c(length(na.omit(selectedGenes)),1)) | |
# for (g in na.omit(selectedGenes)) { | |
# barplot(counts(dds.norm, normalized=TRUE)[g,], | |
# col=coldata$color, | |
# main=g, las=2, cex.names=0.5) | |
# } | |
compt <- cbind(pseudo.counts[,paste(comp.samples,'pseudo',sep='.')]) | |
for (g in na.omit(selectedGenes)) { | |
barplot(compt[g,], | |
col=coldata[comp.samples,]$color, | |
main=g, las=2, cex.names=0.5) | |
} | |
dev.off() | |
colnames(comp.df) <- paste(CompPrefix,".",colnames(comp.df),sep="") | |
deseq2_results_list[[idx]] <- comp.df | |
} | |
## WRITE RESULTS FROM ALL COMPARISONS TO FILE | |
# | |
deseq2_results_table <- cbind(interval.table, do.call(cbind, deseq2_results_list),raw.counts,pseudo.counts) | |
write.table(deseq2_results_table, file=ResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) | |
################################################ | |
################################################ | |
## R SESSION INFO ## | |
################################################ | |
################################################ | |
RLogFile <- "R_sessionInfo.log" | |
if (file.exists(RLogFile) == FALSE) { | |
sink(RLogFile) | |
a <- sessionInfo() | |
print(a) | |
sink() | |
} | |
################################################ | |
################################################ | |
################################################ | |
################################################ | |
# library(biomaRt) # Install the biomaRt package (Bioconductor) if this command does not work | |
# | |
# resultDESeq2.df <- na.omit(data.frame(comp.results)) | |
# induced.sign <- subset(resultDESeq2.df , log2FoldChange >= 1 & padj < alpha) | |
# geneLevelStats <- induced.sign[,c("log2FoldChange","padj")] | |
# | |
# | |
# # Get the Ensembl ID to gene name mapping: | |
# mart <- useMart("ENSEMBL_MART_ENSEMBL", dataset = "fcatus_gene_ensembl", host="www.ensembl.org") | |
# ensembl2name <- getBM(attributes=c("ensembl_gene_id","external_gene_name","entrezgene"),mart=mart) | |
# | |
# # Merge it with our gene-level statistics: | |
# geneLevelStats <- merge(x=geneLevelStats, y=ensembl2name, by.x=0, by.y=1, all.x=TRUE) | |
# colnames(geneLevelStats) <- c("ensembl","log2fc","padj","gene","entrezgene") | |
# geneLevelStats <- geneLevelStats[!duplicated(geneLevelStats$ensembl), ] #### | |
# rownames(geneLevelStats) <- geneLevelStats[,1] | |
# | |
# # Sort the gene-level statistics by adjusted p-value: | |
# geneLevelStats <- geneLevelStats[order(geneLevelStats$padj),] | |
# | |
# ## KEGG over-representation test | |
# geneLevelStats <- na.omit(geneLevelStats, cols = c("entrezgene")) | |
# | |
# gene <- na.omit(geneLevelStats$entrezgene) | |
# geneList <- as.vector(geneLevelStats$log2fc) | |
# | |
# kk <- clusterProfiler::enrichKEGG(gene = gene, | |
# organism = 'fca', | |
# pAdjustMethod = "BH", | |
# pvalueCutoff = 0.05, | |
# qvalueCutoff = 0.05) | |
# #gene = geneLevelStats$ensembl, keyType = 'ENSEMBL' | |
# | |
# head(as.data.frame(kk)[,-8]) | |
# #plots | |
# cnetplot(kk, categorySize="geneNum", foldChange=geneList) | |
# cnetplot(kk, categorySize="pvalue", foldChange=geneList) | |
# emapplot(kk) | |
# barplot(kk, drop=TRUE, showCategory=12) | |
# dotplot(kk) | |
# ## KEGG GSA test | |
# geneList <- geneLevelStats[c("entrezgene", "log2fc", "uniprot_gn")] | |
# geneList <- geneList[!duplicated(geneList$uniprot_gn), ] #### | |
# geneList <- geneList[geneList$log2fc < 0,] | |
# geneList <- geneList[abs(geneList$log2fc) > 1,] | |
# ## feature 1: numeric vector | |
# geneList1 <- as.vector(geneList$log2fc) | |
# ## feature 2: named vector | |
# names(geneList1) = geneList$uniprot_gn | |
# ## feature 3: decreasing order | |
# geneList1 = sort(geneList1, decreasing = TRUE) | |
# | |
# kk2 <- gseKEGG(geneList = geneList1, | |
# organism = 'fca', | |
# nPerm = 10000, | |
# keyType = 'uniprot', | |
# minGSSize = 1, | |
# pvalueCutoff = 0.9, | |
# verbose = TRUE) | |
# head(kk2) | |
# | |
# | |
# # if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# # install.packages("BiocManager") | |
# # BiocManager::install("biomaRt", version = "3.8") | |
# library("biomaRt") | |
# if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
# BiocManager::install("AnnotationForge", version = "3.8") | |
# | |
# | |
# library(clusterProfiler) | |
# Gff2GeneTable("/lrlhps/users/c274411/00_references/aws-igenomes/Felis_catus/Felis_catus_9.0/Ensembl/Annotation/Genes/Felis_catus.Felis_catus_9.0.94.gtf") | |
# | |
# Gff2GeneTable("/lrlhps/users/c274411/TEST/scripts/Felis_catus.Felis_catus_9.0.94.chr.gff3") | |
# | |
# Gff2GeneTable("/lrlhps/users/c274411/TEST/scripts/GCF_000181335.3_Felis_catus_9.0_genomic.gff") | |
# | |
# #gffread /lrlhps/users/c274411/00_references/aws-igenomes/Felis_catus/Felis_catus_9.0/Ensembl/Annotation/Genes/Felis_catus.Felis_catus_9.0.94.gtf -o- > Felis_catus.Felis_catus_9.0.94.gff | |
# | |
# | |
# Gff2GeneTable("/lrlhps/users/c274411/TEST/scripts/GCF_000181335.3_Felis_catus_9.0_genomic.gff") | |
# load("geneTable.rda") | |
# head(geneTable) | |
# eg <- geneTable$GeneID | |
# | |
# | |
#if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
#library(BiocInstaller) | |
#BiocManager::valid() | |
#BiocManager::install("AnnotationHub", version = "3.8") | |
#BiocManager::install("AnnotationHubData", version = "3.8") | |
## Gene ID conversion | |
#gene.df <- bitr(repressed.sign, fromType = "ENSEMBL", | |
# toType = c("ENTREZID", "SYMBOL"), | |
# OrgDb = Felis) | |
#keytypes(Felis) | |
ggo <- groupGO(gene = repressed.sign, | |
OrgDb = Felis, | |
keyType = 'ENSEMBL', | |
ont = "CC", | |
level = 3, | |
readable = TRUE) | |
head(ggo) | |
ego <- enrichGO(gene = repressed.sign, | |
OrgDb = Felis, | |
keyType = 'ENSEMBL', | |
pAdjustMethod = "BH", | |
pvalueCutoff = 0.05, | |
qvalueCutoff = 0.05, | |
readable = TRUE) | |
#universe = rownames(resultDESeq2.df), ont = "CC", | |
bp2 <- simplify(ego, cutoff=0.7, by="p.adjust", select_fun=min) | |
## feature 1: numeric vector | |
geneList = mydf_up$log2fc | |
## feature 2: named vector | |
names(geneList) = as.character(mydf_up$ensembl) | |
## feature 3: decreasing order | |
geneList = sort(geneList, decreasing = TRUE) | |
#ego <- setReadable(ego, OrgDb = Felis, keyType = "ENSEMBL") | |
gsecc <- gseGO(geneList=geneList, ont="MF", keyType = 'ENSEMBL', OrgDb=Felis, verbose=F) | |
head(summary(gsecc)) | |
#enrichMap(ego, vertex.label.cex=1.2, layout=igraph::layout.kamada.kawai) | |
cnetplot(formula_res1, foldChange=mydf$log2fc) | |
plotGOgraph(ego) | |
heatplot(ego, foldChange = mydf$log2fc, showCategory = 20000)+ ggplot2::coord_flip() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment