Created
October 18, 2017 16:37
-
-
Save Dudemanguy/e4da2676accecad955341377119c6cfc to your computer and use it in GitHub Desktop.
Script for performing differential gene expression between two groups
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(HTSFilter) | |
library(edgeR) | |
suppressPackageStartupMessages(library(DESeq2)) | |
options(max.width=9999999, width=10000) | |
#define functions | |
#user input | |
print("Script for performing differential gene expression between two groups") | |
count_raw <- readline("Enter the file containing the count matrix \n") | |
count_matrix <- read.table(count_raw) | |
sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n") | |
#inputting sample groups; provide manual option and reading from a file | |
if (sample_query==1){ | |
columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n") | |
sample_split <- strsplit(columns_to_keep, ",") | |
count_subset <- subset(count_matrix, select=eval(parse(text=sample_split))) | |
#ask for condition entry | |
condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n") | |
if (condition_query2==1){ | |
define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n") | |
group_split <- strsplit(define_group, ",") | |
group <- unlist(group_split) | |
group <- factor(group, levels=unique(unlist(group_split))) | |
} | |
else if (condition_query2==2){ | |
define_group <- readline("Enter the file name containing the condition \n") | |
group <- readLines(define_group) | |
group <- factor(group, levels=unique(readLines(define_group))) | |
} | |
} | |
#sample entry | |
if (sample_query==2){ | |
sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n") | |
if (sample_query2==1){ | |
print("File only contains sample information") | |
sample_file <- readline("Enter the file name \n") | |
columns_to_keep <- readLines(sample_file) | |
count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep)))) | |
condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n") | |
#ask for condition entry | |
if (condition_query==1){ | |
define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n") | |
group_split <- strsplit(define_group, ",") | |
group <- unlist(group_split) | |
group <- factor(group, levels=unique(unlist(group_split))) | |
} | |
else if (condition_query==2){ | |
define_group <- readline("Enter the file name containing the condition \n") | |
group <- read.table(define_group) | |
group <- factor(group, levels=unique(read.table(define_group))) | |
} | |
} | |
else if (sample_query==2){ | |
print("File contains sample and condition information") | |
complete_file <- readline("Enter the file name \n") | |
complete_table <- read.table(complete_file) | |
columns_to_keep <- as.vector(complete_table[,1]) | |
count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep)))) | |
group <- complete_table[,2] | |
group <- factor(group, levels=unique(complete_table[,2])) | |
} | |
} | |
#error handling | |
if (exists("count_subset")==FALSE){ | |
print("Error. No count matrix found. Retry entry.") | |
stop() | |
} | |
#get the number of samples in the first group | |
group_table <- table(group) | |
n <- group_table[[1]][1] | |
#output directory for results | |
dir_output <- readline("Enter the name of the output directory \n") | |
dir.create(dir_output) | |
#apply HTSFilter | |
filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25) | |
count_filter <- filter$filteredData | |
#apply edgeR | |
y <- DGEList(counts=count_filter, group=group) | |
y <- calcNormFactors(y) | |
y <- estimateDisp(y) | |
et <- exactTest(y) | |
#apply DESeq2 | |
group_dataframe <- data.frame(group) | |
dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group) | |
dds <- DESeq(dds) | |
res <- results(dds) | |
#create data frame for edgeR results | |
et_results <- topTags(et, n=Inf, sort.by="none") | |
write.table(et_results, file="et_output", sep="\t") | |
et_table <- read.table("et_output") | |
et_table <- et_table[-(2)] | |
file.remove("et_output") | |
#create data frame for DESeq2 results | |
resOrdered <- res[order(res$padj),] | |
write.table(resOrdered, file="DESeq_output", sep="\t") | |
res_order <- read.table("DESeq_output") | |
file.remove("DESeq_output") | |
#grab counts, average them, add to data frame, and sort | |
a <- count_filter[,1:n] | |
b <- count_filter[,(n+1):ncol(count_filter)] | |
c <- data.frame(rowMeans(a)) | |
d <- data.frame(rowMeans(b)) | |
et_table["Avg Ct A"] <- c | |
et_table["Avg Ct B"] <- d | |
et_table <- et_table[,c(4,5,1,2,3)] | |
et_table <- et_table[order(et_table$FDR, decreasing=FALSE),] | |
res_order["Avg Ct A"] <- c | |
res_order["Avg Ct B"] <- d | |
res_order <- res_order[,c(7,8,1,2,3,4,5,6)] | |
#print results and save to file | |
setwd(dir_output) | |
dir.create("edgeR") | |
setwd("edgeR") | |
sink("dgelist_output") | |
print(y) | |
sink() | |
write.table(et_table, file="full_results", sep="\t", quote=FALSE) | |
et_sig <- et_table[which(et_table$FDR<0.05),] | |
write.table(et_sig, file="significant_results", sep="\t", quote=FALSE) | |
setwd("..") | |
dir.create("DESeq2") | |
setwd("DESeq2") | |
sink("deseq2_summary") | |
print(summary(res)) | |
sink() | |
write.table(res_order, file="full_results", sep="\t", quote=FALSE) | |
res_sig <- res_order[which(res_order$padj<0.05),] | |
write.table(res_sig, file="significant_results", sep="\t", quote=FALSE) | |
setwd("..") | |
setwd("..") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment