Last active
June 27, 2018 00:29
-
-
Save chasemc/b185c441c4165ef480eaef7f304c560f to your computer and use it in GitHub Desktop.
Messy little script to run local BLAST against 16S database
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "16S" | |
author: "Chase Clark" | |
date: "May 15, 2018" | |
output: html_document | |
--- | |
```{r eval=FALSE} | |
# download file | |
#download.file(url = "ftp://ftp.ncbi.nlm.nih.gov/blast/db/16SMicrobial.tar.gz", destfile = "16SMicrobial.tar.gz", mode = 'wb') | |
# extract files from zipped file | |
#untar("16SMicrobial.tar.gz", exdir = "data/16S/blast_16SMicrobial_db") | |
``` | |
```{r eval=FALSE} | |
Sys.setenv(PATH = paste(Sys.getenv("PATH"), "C:/Program Files/NCBI/blast-2.7.1/bin", sep= .Platform$path.sep)) | |
bacteria16Sdatabase <- blast(db = "data/16S/blast_16SMicrobial_db/16SMicrobial") # create rBLAST database connection | |
sequences <- Biostrings::readDNAStringSet("data/16S/16S_fasta.fas", "fasta") # read in fasta | |
multiBLAST <- function(seqs, database = bacteria16Sdatabase, keep = 0.95){ | |
seq_pred <- NULL | |
# use rBLAST and add sequence to subsequent dataframe | |
for (i in seq_along(seqs)){ | |
predict(database, seqs[i]) %>% # run blastn | |
dplyr::top_n(5) %>% | |
dplyr::mutate_at(., c('QueryID', 'SubjectID'), as.character) %>% # change to character | |
dplyr::bind_rows(seq_pred, .) -> | |
seq_pred | |
} | |
# return dataframe | |
return(seq_pred) | |
} | |
blastResults <- multiBLAST(sequences) # run multiBLAST function defined above | |
blastResults <- as_tibble(blastResults) | |
saveRDS(blastResults,"data/16S/LocalBlast/blastResults.rds") | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment