ohofmann · February 12, 2016 15:19
diff --git a/coverage.R b/coverage.R
 library(tidyr)
 library(dplyr)

 # Import/concat BED coverage files
 file_list <- list.files(path='coverage/', pattern='*_coverage_fixed.bed')
 
 for (file in file_list){
  # if the merged dataset does exist, append to it
  if (exists("dataset")){
    temp_dataset <-read.table(file.path('coverage', file), 
                             header=FALSE, sep="\t", stringsAsFactors=FALSE,
                             comment.char='#', skip=1)
    dataset<-rbind(dataset, temp_dataset)
    rm(temp_dataset)
  }

  # if the merged dataset doesn't exist, create it
  if (!exists("dataset")){
    dataset <- read.table(file.path('coverage', file), header=FALSE, 
                          sep="\t", stringsAsFactors=FALSE,
                          comment.char='#', skip=1)
  }
 }

 colnames(dataset) <- c('chrom', 'chromStart', 'chromEnd',
                       'name', 'readCount', 'meanCoverage',
                       'percentage1', 'percentage5', 'percentage10',
                       'percentage20', 'percentage40', 'percentage50',
                       'percentage60', 'percentage70', 'percentage80',
                       'percentage100', 'sampleName')

 # Replace the name with something more readable
 dataset$gene <- sapply(strsplit(dataset$name, ","), "[[", 1)

 # Name of regions are not unique. Come up with new ID
 dataset$region <- paste(dataset$chrom, 
                        dataset$chromStart, 
                        dataset$chromEnd,
                        dataset$gene,
                        sep='.')

 # Re-organise into a matrix format, keeping only the 
 # 50x cutoff
 c50 <- dataset %>% select(region, sampleName, percentage50) %>% 
  spread(sampleName, percentage50)
 rowlabels <- c50$region

 # Remove region information and cast to numeric
 c50 <- c50[, c(2:8)]
 c50 <- as.data.frame(lapply(c50, as.numeric))
 rownames(c50) <- rowlabels
 c50$Mean <- rowMeans(c50)
 write.csv(c50, file='mean50.csv')

 datatable(c50,
          rownames=TRUE) %>%
  formatRound(c(1:8), 2) %>%
  formatStyle(c(1:8),
              backgroundColor = styleInterval(c(50, 70, 90),
                                              c('#f03b20',
                                                '#feb24c',
                                                '#ffeda0',
                                                'white')))
	library(tidyr)
	library(dplyr)

	# Import/concat BED coverage files
	file_list <- list.files(path='coverage/', pattern='*_coverage_fixed.bed')

	for (file in file_list){
	# if the merged dataset does exist, append to it
	if (exists("dataset")){
	temp_dataset <-read.table(file.path('coverage', file),
	header=FALSE, sep="\t", stringsAsFactors=FALSE,
	comment.char='#', skip=1)
	dataset<-rbind(dataset, temp_dataset)
	rm(temp_dataset)
	}

	# if the merged dataset doesn't exist, create it
	if (!exists("dataset")){
	dataset <- read.table(file.path('coverage', file), header=FALSE,
	sep="\t", stringsAsFactors=FALSE,
	comment.char='#', skip=1)
	}
	}

	colnames(dataset) <- c('chrom', 'chromStart', 'chromEnd',
	'name', 'readCount', 'meanCoverage',
	'percentage1', 'percentage5', 'percentage10',
	'percentage20', 'percentage40', 'percentage50',
	'percentage60', 'percentage70', 'percentage80',
	'percentage100', 'sampleName')

	# Replace the name with something more readable
	dataset$gene <- sapply(strsplit(dataset$name, ","), "[[", 1)

	# Name of regions are not unique. Come up with new ID
	dataset$region <- paste(dataset$chrom,
	dataset$chromStart,
	dataset$chromEnd,
	dataset$gene,
	sep='.')

	# Re-organise into a matrix format, keeping only the
	# 50x cutoff
	c50 <- dataset %>% select(region, sampleName, percentage50) %>%
	spread(sampleName, percentage50)
	rowlabels <- c50$region

	# Remove region information and cast to numeric
	c50 <- c50[, c(2:8)]
	c50 <- as.data.frame(lapply(c50, as.numeric))
	rownames(c50) <- rowlabels
	c50$Mean <- rowMeans(c50)
	write.csv(c50, file='mean50.csv')

	datatable(c50,
	rownames=TRUE) %>%
	formatRound(c(1:8), 2) %>%
	formatStyle(c(1:8),
	backgroundColor = styleInterval(c(50, 70, 90),
	c('#f03b20',
	'#feb24c',
	'#ffeda0',
	'white')))
No results found