Skip to content

Instantly share code, notes, and snippets.

@genomewalker
Last active May 16, 2018 13:43
Show Gist options
  • Save genomewalker/80a5c46ee40e736f9a459168c6ca4f7f to your computer and use it in GitHub Desktop.
Save genomewalker/80a5c46ee40e736f9a459168c6ca4f7f to your computer and use it in GitHub Desktop.
# Let’s get the non-merged reads ------------------------------------------
purrr::map_df(merged, tidyr::extract, col = "sequence", into = "sequence" ) %>%
filter(accept == FALSE) %>%
select(nmatch, nmismatch, nindel) %>%
skimr::skim()
concat <- mergePairs(dada_forward, derep_forward, dada_reverse, derep_reverse, justConcatenate = TRUE)
get_nonmerged <- function(X, merg = merg, conc = conc, fn = fn){
m <- merg[[X]]
cn <- conc[[X]]
bind_cols(m, cn) %>%
filter(eval(parse(text = fn))) %>%
mutate(sequence = sequence1, accept = TRUE) %>%
select(sequence, abundance, forward, reverse, nmatch, nmismatch, nindel, prefer, accept) %>%
as.data.frame()
}
# flt <- paste("accept == FALSE", "nmatch > 0", "nmismatch == 0", "nindel == 0", sep = "&")
flt <- paste("accept == FALSE", "nmatch > 0", sep = "&")
test <- lapply(names(merged), get_nonmerged, merg = merged, conc = concat, fn = flt)
names(test) <- names(merged)
#Construct sequence table
seqtab_nm <- makeSequenceTable(test)
dim(seqtab_nm)
table(nchar(getSequences(seqtab_nm)))
#Remove chimeras
seqtab_nm.nochim <- removeBimeraDenovo(seqtab_nm, method = "consensus", multithread = 2, verbose = TRUE)
dim(seqtab_nm.nochim)
sum(seqtab_nm.nochim)/sum(seqtab_nm)
taxa_nm <- assignTaxonomy(seqtab_nm.nochim, "../silva_nr_v132_train_set.fa.gz", multithread=2, tryRC = TRUE)
taxa_nm_filt <- taxa_nm %>%
as_tibble(rownames = "sequence") %>%
mutate(type = case_when(Order == "Chloroplast" ~ "Chloroplast",
Family == "Mitochondria" ~ "Mitochondria",
TRUE ~ "Other")) %>%
select(sequence, type) %>%
add_count(type) %>%
mutate(type = paste(type, " (ASVs = ", n, ")", sep = ""))
purrr::map_df(test, tidyr::extract, col = "sequence", into = "sequence" ) %>%
as_tibble() %>%
right_join(taxa_nm_filt) %>%
select(nmatch, nmismatch, nindel, type) %>%
ggplot(aes(nmismatch, nmatch)) +
geom_hline(yintercept = 80, color = "#AD303E", linetype = "twodash") +
geom_point(shape = 21, color = "#282C34", fill = "#282C34", alpha = 0.6) +
theme_light() +
facet_wrap(~type, nrow = 1)
Skim summary statistics
n obs: 1653
n variables: 3
Variable type: integer
variable missing complete n mean sd p0 p25 p50 p75 p100 hist
nindel 0 1653 1653 0.0012 0.035 0 0 0 0 1 ▇▁▁▁▁▁▁▁
nmatch 0 1653 1653 23.94 37.76 0 0 1 78 102 ▇▁▁▁▁▁▂▁
nmismatch 0 1653 1653 0.18 0.38 0 0 0 0 1 ▇▁▁▁▁▁▁▂
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment