Skip to content

Instantly share code, notes, and snippets.

@thanhleviet
Created June 12, 2019 22:40
Show Gist options
  • Save thanhleviet/9db6009280b886ba175fe8c2b91b6bc5 to your computer and use it in GitHub Desktop.
Save thanhleviet/9db6009280b886ba175fe8c2b91b6bc5 to your computer and use it in GitHub Desktop.
Plot depth cov for dengue genome
library(readr)
library(dplyr)
add.alpha <- function(col, alpha=1){
if(missing(col))
stop("Please provide a vector of colours.")
apply(sapply(col, col2rgb)/255, 2,
function(x)
rgb(x[1], x[2], x[3], alpha=alpha))
}
bed_files <- list.files("depth_batch1/","*.bed", full.names = T)
batch1 <- grep("depth//[0-9]{3}", bed_files, value = T)
batch1_name <- gsub("depth_batch3//|_recal_zero_genomecov.bed", "",batch1)
batch2 <- grep("depth_batch1//[0-9]{1,2}_", bed_files, value = T)
batch2_name <- gsub("depth_batch1//|_recal_zero_genomecov.bed", "",batch2)
# bed <- read_delim(bed_files[2], delim = "\t", col_names = c("seq", "pos", "depth"))
beds1 <- lapply(batch1, function(x) read_delim(x, delim = "\t", col_names = c("seq", "pos", "depth")))
beds2 <- lapply(batch2, function(x) read_delim(x, delim = "\t", col_names = c("seq", "pos", "depth")))
# png(filename = "doc.png", width = 1200, height = 800, units = "px", type = "cairo", antialias = "subpixel")
# seq_300 <- map_lgl(beds1, function(x) sum(x$depth==0) > 300)
# new_beds <- beds[seq_300]
# with(bed, plot(pos, depth, type = "l", ylim = c(-1, 7000), col = "grey90"))
# for (i in seq_along(beds)[1:3]) {
# print(i)
# with(beds[[i]],
# polygon(c(pos[1], pos, pos[10690]),c(min(depth), depth, min(depth)),
# border = NA,
# col = "grey90"))
# }
beds <- beds2
low_cov <- lapply(beds, function(x) x %>% filter(seq != "DENV2", depth < 10) %>% nrow() > 0) %>% unlist
beds <- lapply(beds[low_cov], function(x) x %>% filter(depth < 10))
y_max <- length(beds) + 10
beds_avg <- bind_rows(beds2) %>%
group_by(pos) %>%
summarise(mean_depth = mean(depth),
median_depth = median(depth),
min_depth = min(depth),
max_depth = max(depth))
# png(filename = "batch1.png", width = 2830, height = 1390, type = "quartz", pointsize = 36)
dev.off()
# layout(mat = matrix(c(1,1,2,2), 2,2, byrow = TRUE), heights = c(4,2), FALSE)
# layout.show(10)
# par(mar = c(1,4,2,1) + 0.1, oma = c(0,0,0,0))
par(mar = c(5,4,2,4))
plot(0,0, ylim = c(0,y_max), xlim = c(1,11000), type = "n", ylab = "Sample", xlab = "Locus", axes=FALSE, frame.plot=FALSE, main = "Batch 2")
# kolor <- distinctColorPalette(nrow(den1_ft))
kolor <- c("grey80", "#DD6179", "#7FDBD2", "#7ADD99", "#7EADD7", "#A543E5", "#82E557", "#D2DCA2", "#D6DBDD", "#C7A095", "#DC61CB", "#D6A2D5", "#8278D5", "grey80")
for (i in seq_len(nrow(den1_ft))){
yy <- c(0, y_max, y_max, 0)
start <- den1_ft[i,]$start
end <- den1_ft[i,]$end
gene <- den1_ft$product1
mid <- den1_ft$mid
xx <- c(start, start, end, end)
polygon(xx, yy, col = add.alpha(kolor[i], 0.75), border = NA)
text(mid, y_max, gene, srt = 90, adj = 1)
}
for (i in seq_along(beds)){
abline(h = i, lty = 3, col = add.alpha("grey40", 0.2))
points(x = beds[[i]]$pos, y = rep(i, nrow(beds[[i]])), col = add.alpha("black", 0.09), pch = 19, cex = 0.5)
}
axis(side=2, at = seq(1,95,10), las = 2, lwd = 2)
axis(side=1, at = seq(0,11000,1000), pos = -1, lwd = 2)
# par(mar = c(4.1,4.1,0,1.1))
par(new = T)
with(beds_avg, plot(x = pos, y = median_depth, type = "l", axes = F, frame.plot=FALSE , xlab = NA, ylab = NA, col = add.alpha("red", 0.8)))
max_cov <- max(beds_avg$median_depth) + 100
axis(side=4, at = seq(0,max_cov,500), pos = 10800, las = 2, col = add.alpha("red", 0.8), col.axis = add.alpha("red", 0.8), lwd = 2)
mtext(side = 4, line = 2, "Median of Coverage", col = add.alpha("red", 0.8))
################################################################
beds1a <- lapply(beds2, function (x)
x %>% mutate(pos = as.numeric(pos), product = case_when(
(pos >= 73 & pos <= 413) ~ "anchored capsid protein",
(pos >= 415 & pos <= 906) ~ "membrane precursor",
(pos >= 907 & pos <= 2397) ~ "E gene",
(pos >= 2398 & pos <= 3453) ~ "NS1",
(pos >= 3454 & pos <= 4107) ~ "NS2A",
(pos >= 4108 & pos <= 4497) ~ "NS2B",
(pos >= 4498 & pos <= 6354) ~ "NS3",
(pos >= 6355 & pos <= 6735) ~ "NS4A",
(pos >= 6736 & pos <= 6804) ~ "2K",
(pos >= 6805 & pos <= 7551) ~ "NS4B",
(pos >= 7552 & pos <= 10248) ~ "NS5",
TRUE ~ "non-CDS"
)) %>%
filter(depth < 10) %>%
group_by(product) %>%
summarise(counts = n()) %>%
spread(key = product, value = counts, fill = "0")
) %>%
bind_rows() %>%
mutate(sample = batch2_name) %>%
select(sample, everything())
write_csv(beds1a, "batch2_less_than_10x.csv")
###Average coverage
avg_cov <- lapply(beds2, function(x) sum(x$depth/nrow(x))) %>% unlist()
avg_cov_df <- data.frame(sample = batch2_name, avg_cov = avg_cov)
write_csv(avg_cov_df, "batch_2_avg_cov.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment