Dudemanguy · October 18, 2017 16:37
diff --git a/diffgene_script.R b/diffgene_script.R
 library(HTSFilter)
 library(edgeR)
 suppressPackageStartupMessages(library(DESeq2))
 options(max.width=9999999, width=10000)

 #define functions

 #user input
 print("Script for performing differential gene expression between two groups")
 count_raw <- readline("Enter the file containing the count matrix \n")
 count_matrix <- read.table(count_raw)
 sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n")

 #inputting sample groups; provide manual option and reading from a file
 if (sample_query==1){
 	columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n")
 	sample_split <- strsplit(columns_to_keep, ",")
 	count_subset <- subset(count_matrix, select=eval(parse(text=sample_split)))

 	#ask for condition entry
 	condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
 	if (condition_query2==1){
 		define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
 		group_split <- strsplit(define_group, ",")
 		group <- unlist(group_split)
 		group <- factor(group, levels=unique(unlist(group_split)))
 	}
 	
 	else if (condition_query2==2){
 		define_group <- readline("Enter the file name containing the condition \n")
 		group <- readLines(define_group)
 		group <- factor(group, levels=unique(readLines(define_group)))
 	}
 }

 #sample entry
 if (sample_query==2){
 	sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n")
 	if (sample_query2==1){
 		print("File only contains sample information")
 		sample_file <- readline("Enter the file name \n") 
 		columns_to_keep <- readLines(sample_file)
 		count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
 		condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")

 		#ask for condition entry
 			if (condition_query==1){
 				define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
 				group_split <- strsplit(define_group, ",")
 				group <- unlist(group_split)
 				group <- factor(group, levels=unique(unlist(group_split)))
 			}

 			else if (condition_query==2){
 				define_group <- readline("Enter the file name containing the condition \n")
 				group <- read.table(define_group)
 				group <- factor(group, levels=unique(read.table(define_group)))
 			}
 	}

 	else if (sample_query==2){
 		print("File contains sample and condition information")
 		complete_file <- readline("Enter the file name \n") 
 		complete_table <- read.table(complete_file) 
 		columns_to_keep <- as.vector(complete_table[,1])
 		count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
 		group <- complete_table[,2]
 		group <- factor(group, levels=unique(complete_table[,2]))
 	}
 }


 #error handling

 if (exists("count_subset")==FALSE){
 	print("Error. No count matrix found. Retry entry.")
 	stop()
 }

 #get the number of samples in the first group
 group_table <- table(group)
 n <- group_table[[1]][1]

 #output directory for results

 dir_output <- readline("Enter the name of the output directory \n")
 dir.create(dir_output) 

 #apply HTSFilter
 filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25)
 count_filter <- filter$filteredData

 #apply edgeR
 y <- DGEList(counts=count_filter, group=group)
 y <- calcNormFactors(y)
 y <- estimateDisp(y)
 et <- exactTest(y)

 #apply DESeq2
 group_dataframe <- data.frame(group)
 dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group)
 dds <- DESeq(dds)
 res <- results(dds)

 #create data frame for edgeR results
 et_results <- topTags(et, n=Inf, sort.by="none")
 write.table(et_results, file="et_output", sep="\t")
 et_table <- read.table("et_output")
 et_table <- et_table[-(2)]
 file.remove("et_output")

 #create data frame for DESeq2 results
 resOrdered <- res[order(res$padj),]
 write.table(resOrdered, file="DESeq_output", sep="\t")
 res_order <- read.table("DESeq_output")
 file.remove("DESeq_output")

 #grab counts, average them, add to data frame, and sort

 a <- count_filter[,1:n]
 b <- count_filter[,(n+1):ncol(count_filter)]
 c <- data.frame(rowMeans(a))
 d <- data.frame(rowMeans(b))
 et_table["Avg Ct A"] <- c
 et_table["Avg Ct B"] <- d
 et_table <- et_table[,c(4,5,1,2,3)]
 et_table <- et_table[order(et_table$FDR, decreasing=FALSE),]

 res_order["Avg Ct A"] <- c
 res_order["Avg Ct B"] <- d
 res_order <- res_order[,c(7,8,1,2,3,4,5,6)]

 #print results and save to file

 setwd(dir_output)

 dir.create("edgeR")
 setwd("edgeR")
 sink("dgelist_output")
 print(y)
 sink()
 write.table(et_table, file="full_results", sep="\t", quote=FALSE)
 et_sig <- et_table[which(et_table$FDR<0.05),]
 write.table(et_sig, file="significant_results", sep="\t", quote=FALSE)
 setwd("..")

 dir.create("DESeq2")
 setwd("DESeq2")
 sink("deseq2_summary")
 print(summary(res))
 sink()
 write.table(res_order, file="full_results", sep="\t", quote=FALSE)
 res_sig <- res_order[which(res_order$padj<0.05),]
 write.table(res_sig, file="significant_results", sep="\t", quote=FALSE)
 setwd("..")
 setwd("..")
	library(HTSFilter)
	library(edgeR)
	suppressPackageStartupMessages(library(DESeq2))
	options(max.width=9999999, width=10000)

	#define functions

	#user input
	print("Script for performing differential gene expression between two groups")
	count_raw <- readline("Enter the file containing the count matrix \n")
	count_matrix <- read.table(count_raw)
	sample_query <- readline("Press 1 to enter samples manually. Press 2 to read from a file (each entry on a separate line) \n")

	#inputting sample groups; provide manual option and reading from a file
	if (sample_query==1){
	columns_to_keep <- readline("Enter the desired samples as numbers separated by a comma prefaced by a lowercase h (e.g. h001,h002,h003,etc.) \n")
	sample_split <- strsplit(columns_to_keep, ",")
	count_subset <- subset(count_matrix, select=eval(parse(text=sample_split)))

	#ask for condition entry
	condition_query2 <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")
	if (condition_query2==1){
	define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
	group_split <- strsplit(define_group, ",")
	group <- unlist(group_split)
	group <- factor(group, levels=unique(unlist(group_split)))
	}

	else if (condition_query2==2){
	define_group <- readline("Enter the file name containing the condition \n")
	group <- readLines(define_group)
	group <- factor(group, levels=unique(readLines(define_group)))
	}
	}

	#sample entry
	if (sample_query==2){
	sample_query2 <- readline("Press 1 if the file only contains the desired samples. Press 2 if the file also defines the condition (tab delimited) \n")
	if (sample_query2==1){
	print("File only contains sample information")
	sample_file <- readline("Enter the file name \n")
	columns_to_keep <- readLines(sample_file)
	count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
	condition_query <- readline("Press 1 to enter the condition manually. Press 2 to read from a file (each entry on a separate line) \n")

	#ask for condition entry
	if (condition_query==1){
	define_group <- readline("Enter the condition for each sample separated by commas (e.g. 1,1,2,2). Note that the order must be the same as the samples. \n")
	group_split <- strsplit(define_group, ",")
	group <- unlist(group_split)
	group <- factor(group, levels=unique(unlist(group_split)))
	}

	else if (condition_query==2){
	define_group <- readline("Enter the file name containing the condition \n")
	group <- read.table(define_group)
	group <- factor(group, levels=unique(read.table(define_group)))
	}
	}

	else if (sample_query==2){
	print("File contains sample and condition information")
	complete_file <- readline("Enter the file name \n")
	complete_table <- read.table(complete_file)
	columns_to_keep <- as.vector(complete_table[,1])
	count_subset <- subset(count_matrix, select = eval(parse(text=list(columns_to_keep))))
	group <- complete_table[,2]
	group <- factor(group, levels=unique(complete_table[,2]))
	}
	}


	#error handling

	if (exists("count_subset")==FALSE){
	print("Error. No count matrix found. Retry entry.")
	stop()
	}

	#get the number of samples in the first group
	group_table <- table(group)
	n <- group_table[[1]][1]

	#output directory for results

	dir_output <- readline("Enter the name of the output directory \n")
	dir.create(dir_output)

	#apply HTSFilter
	filter <- HTSFilter(count_subset, group, s.min=1, s.max=200, s.len=25)
	count_filter <- filter$filteredData

	#apply edgeR
	y <- DGEList(counts=count_filter, group=group)
	y <- calcNormFactors(y)
	y <- estimateDisp(y)
	et <- exactTest(y)

	#apply DESeq2
	group_dataframe <- data.frame(group)
	dds <- DESeqDataSetFromMatrix(countData=count_filter, colData=group_dataframe, design=~group)
	dds <- DESeq(dds)
	res <- results(dds)

	#create data frame for edgeR results
	et_results <- topTags(et, n=Inf, sort.by="none")
	write.table(et_results, file="et_output", sep="\t")
	et_table <- read.table("et_output")
	et_table <- et_table[-(2)]
	file.remove("et_output")

	#create data frame for DESeq2 results
	resOrdered <- res[order(res$padj),]
	write.table(resOrdered, file="DESeq_output", sep="\t")
	res_order <- read.table("DESeq_output")
	file.remove("DESeq_output")

	#grab counts, average them, add to data frame, and sort

	a <- count_filter[,1:n]
	b <- count_filter[,(n+1):ncol(count_filter)]
	c <- data.frame(rowMeans(a))
	d <- data.frame(rowMeans(b))
	et_table["Avg Ct A"] <- c
	et_table["Avg Ct B"] <- d
	et_table <- et_table[,c(4,5,1,2,3)]
	et_table <- et_table[order(et_table$FDR, decreasing=FALSE),]

	res_order["Avg Ct A"] <- c
	res_order["Avg Ct B"] <- d
	res_order <- res_order[,c(7,8,1,2,3,4,5,6)]

	#print results and save to file

	setwd(dir_output)

	dir.create("edgeR")
	setwd("edgeR")
	sink("dgelist_output")
	print(y)
	sink()
	write.table(et_table, file="full_results", sep="\t", quote=FALSE)
	et_sig <- et_table[which(et_table$FDR<0.05),]
	write.table(et_sig, file="significant_results", sep="\t", quote=FALSE)
	setwd("..")

	dir.create("DESeq2")
	setwd("DESeq2")
	sink("deseq2_summary")
	print(summary(res))
	sink()
	write.table(res_order, file="full_results", sep="\t", quote=FALSE)
	res_sig <- res_order[which(res_order$padj<0.05),]
	write.table(res_sig, file="significant_results", sep="\t", quote=FALSE)
	setwd("..")
	setwd("..")