Created
January 22, 2021 18:04
-
-
Save telatin/8a2c80b760657c10eda791b9e80ad6ce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dada2) | |
library(ggplot) | |
packageVersion("dada2") | |
library(ShortRead) | |
packageVersion("ShortRead") | |
library(Biostrings) | |
packageVersion("Biostrings") | |
path <- "/Volumes/Informatics/telatin/qibx/ITS/" | |
prmr_funcn <- function(prmr) { | |
prmr_dna <- DNAString(prmr) | |
cmplmnt <- toString(complement(prmr_dna)) | |
rvrse <- toString(reverse(prmr_dna)) | |
rvrse_cmplmnt <- toString(reverseComplement(prmr_dna)) | |
return(c(prmr, cmplmnt, rvrse, rvrse_cmplmnt)) | |
} | |
vcnt_funcn <- function(prmr, filt_seq) { | |
vcount_pattern <- vcountPattern(prmr, DNAStringSet(filt_seq), fixed = FALSE) | |
return(sum(vcount_pattern)) | |
} | |
ITS1f <- "CTTGGTCATTTAGAGGAAGTAA" ## Define your primer sequence here | |
ITS4 <- "GCTGCGTTCTTCATCGATGC" | |
ITS1f.form <- prmr_funcn(ITS1f) | |
ITS4.form <- prmr_funcn(ITS4) | |
primer.form <- c(ITS1f.form, ITS4.form) | |
Mock.fnFs <- sort(list.files(path, pattern = "R1.fastq.gz", full.names = TRUE)) | |
Mock.fnRs <- sort(list.files(path, pattern = "R2.fastq.gz", full.names = TRUE)) | |
mock.names <- sapply(strsplit(basename(Mock.fnFs), "_"), "[", 1) | |
mock.names ## Lists the names of all the sequence files | |
# Remove Ns | |
Mock.fnFs.filtN <- file.path(path, "filtN", paste0(mock.names, "_R1_filtN.fastq.gz")) | |
Mock.fnRs.filtN <- file.path(path, "filtN", paste0(mock.names, "_R2_filtN.fastq.gz")) | |
filterAndTrim(Mock.fnFs, Mock.fnFs.filtN, Mock.fnRs, Mock.fnRs.filtN, maxN = 0, | |
multithread = TRUE) | |
#Here we read in the Ns removed FASTQ file using the readFastq function from the ShortRead package and then store it as a character vector. | |
Mock.R1.filtN <- as.character(sread(readFastq(Mock.fnFs[1]))) | |
Mock.R2.filtN <- as.character(sread(readFastq(Mock.fnRs[1]))) | |
#The outputs from applying the vnct_funcn show the presence of the primers on the FASTQ files. | |
# Checking and counting the presence of the primer set in the forward read | |
sapply(primer.form, vcnt_funcn, filt_seq = Mock.R1.filtN) | |
# Checking and counting the presence of the primer set in the reverse read | |
sapply(primer.form, vcnt_funcn, filt_seq = Mock.R2.filtN) | |
dir.create(file.path(path, "cutadapt_trimmed")) | |
path.cutadapt <-"/Volumes/Informatics/telatin/qibx/ITS/cutadapt" | |
Mock.fnFs.cutadapt <- file.path(path.cutadapt, paste0(mock.names, "_R1_trimmed.fastq.gz")) | |
Mock.fnRs.cutadapt <- file.path(path.cutadapt, paste0(mock.names, "_R2_trimmed.fastq.gz")) | |
trim.cmd <- "-n 2 -m 50" | |
primers <- c("-g CTTGGTCATTTAGAGGAAGTAA", "-a GCATCGATGAAGAACGCAGC", "-G GCTGCGTTCTTCATCGATGC", | |
"-A TTACTTCCTCTAAATGACCAAG") | |
cutadapt.path <-"/Users/telatina/miniconda3/envs/cutadapt/bin/cutadapt" | |
for (i in 1:length(mock.names)) { | |
system2(cutadapt.path, args = c(primers, trim.cmd, "-o", Mock.fnFs.cutadapt[i], | |
"-p", Mock.fnRs.cutadapt[i], Mock.fnFs.filtN[i], Mock.fnRs.filtN[i])) | |
} | |
Mock.R1.trimmed <- as.character(sread(readFastq(Mock.fnFs.cutadapt[1]))) | |
Mock.R2.trimmed <- as.character(sread(readFastq(Mock.fnRs.cutadapt[2]))) | |
# Checking and counting the presence of the primer set in the forward read | |
sapply(primer.form, vcnt_funcn, filt_seq = Mock.R1.trimmed) | |
# Checking and counting the presence of the primer set in the reverse read | |
sapply(primer.form, vcnt_funcn, filt_seq = Mock.R2.trimmed) | |
# Forward and reverse fastq filenames have the format: | |
fnFs <- sort(list.files(path.cutadapt, pattern = "R1_trimmed.fastq", full.names = TRUE)) | |
fnRs <- sort(list.files(path.cutadapt, pattern = "R2_trimmed.fastq", full.names = TRUE)) | |
sample.names <- sapply(strsplit(basename(fnFs), "_"), "[", 1) | |
sample.names | |
plotQualityProfile(fnFs[1:2]) | |
filtFs <- file.path(path.cutadapt, "filtered", paste0(sample.names, "_F_filt.fastq.gz")) | |
filtRs <- file.path(path.cutadapt, "filtered", paste0(sample.names, "_R_filt.fastq.gz")) | |
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, maxN = 0, maxEE = c(2, 2), | |
truncQ = 2, rm.phix = TRUE, compress = TRUE, multithread = TRUE) | |
errF <- learnErrors(filtFs, multithread = TRUE) | |
errR <- learnErrors(filtRs, multithread = TRUE) | |
plotErrors(errF, nominalQ = TRUE) | |
derepFs <- derepFastq(filtFs, verbose = TRUE) | |
derepRs <- derepFastq(filtRs, verbose = TRUE) | |
names(derepFs) <- sample.names | |
names(derepRs) <- sample.names | |
dadaFs <- dada(derepFs, err = errF, multithread = TRUE) | |
dadaRs <- dada(derepRs, err = errR, multithread = TRUE) | |
dadaFs[1] | |
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs, verbose=TRUE) | |
head(mergers[[1]]) | |
seqtab <- makeSequenceTable(mergers) | |
seqtab.nochim <- removeBimeraDenovo(seqtab, method="consensus", multithread=TRUE, verbose=TRUE) | |
getN <- function(x) sum(getUniques(x)) | |
track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, | |
getN), rowSums(seqtab.nochim)) | |
# If processing a single sample, remove the sapply calls: e.g. replace | |
# sapply(dadaFs, getN) with getN(dadaFs) | |
colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", | |
"nonchim") | |
rownames(track) <- sample.names | |
UNITE <- "~/git/dadaist2/refs/uniref.fa.gz" | |
taxa <- assignTaxonomy(seqtab.nochim, UNITE, | |
multithread = TRUE, tryRC = TRUE) | |
taxa.print <- taxa # Removing sequence rownames for display only | |
rownames(taxa.print) <- NULL | |
head(taxa.print) | |
write.table(taxa.print, | |
file.path("~/git/dadaist2/qibx_ITS/taxonomy_R.txt"), | |
row.names=TRUE, | |
quote=FALSE | |
) | |
seqtab.nochim2 <- t(seqtab.nochim) # QIIME has OTUs as rows | |
col.names <- basename(filtFs) | |
feature_table_header = '#OTU ID'; | |
col.names[[1]] <- paste0(feature_table_header,"\t", col.names[[1]]) | |
write.table(seqtab.nochim2, file.path("~/git/dadaist2/qibx_ITS/table_R.txt"), sep="\t", | |
row.names=TRUE, col.names=col.names,quote=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment