ramongallego · January 17, 2020 21:14
diff --git a/Query_gb_and_trim.r b/Query_gb_and_trim.r
 library(tidyverse)
 library(insect)
 library(rentrez)
 set_entrez_key (YOUR_KEY_HERE)
 library(here)

 worlds.taxonomy <- read_rds(here("data", "all.taxonomy.rds")) 

 # make your own taxonomy db using the command
 # worlds.taxonomy <- taxonomy(), then save it for later use

 #############
 #############

 # The easiest way is to specify everything in the query, so you don't have to wrangle with the results too much

 ## Example 1: 16S from Chordata mitochondrial genomes

 query <- "Chordata[ORGN]+AND+16S[TITL]"

 Full.search <- searchGB(query,taxIDs = T, sequences = T, prompt=F )

 Fwd_primer <- "GYAATCACTTGTCTTTTAAATGAAGACC"
 Rev_primer <- "GGATTGCGCTGTTATCCCTA"

 Trimmed_fragments <- virtualPCR(Full.search, up = Fwd_primer, down = Rev_primer,
                             trimprimers = TRUE,minamplen = 250, maxamplen = 400, cores = "autodetect")
                             
 ## Now let's assume that you found new sequences that you want to add - maybe full mt genomes that
 ## might have been missed in the first query

 new.query <- "Chordata[ORGN]+AND+13000:18000[SLEN]+mitochondrion[TITL]"

 search_mits <- searchGB(new.query,taxIDs = T, sequences = T, prompt=F)

 Trimmed_mtos <- virtualPCR(search_mits, up = Fwd_primer, down = Rev_primer,
                             trimprimers = TRUE,minamplen = 250, maxamplen = 400, cores = "autodetect")
                    
 ## How to add both lists?

 #### Case 1: Everything went fine (Nothing is repeated in both lists?)

 All.seqs <- append(Trimmed_fragments,Trimmed_mtos)

 ### Case 2: we have to subset, pluck and so on.

 ### Remove sequences without taxID

 Seq.names.16S <- tibble(seq.name = names(Trimmed_fragments),
                    
                    Search = "16S" )

 Seq.names.16S %>%

  separate(seq.name, into = c("Accession", "taxID"), sep = "\\|", remove = F) %>% 
  
  left_join(worlds.taxonomy %>% 
              mutate(taxID = as.character(taxID))) %>% 
  filter(is.na(name)) %>%
  pull (seq.name) -> to.throw # Keep the names of the sequences to remove
  
  Seq.names.16S %>%
  
  filter (! seq.name %in% to.throw ) %>% 
  
  pull (seq.name) -> to.keep
  
  
 ## Now select only the seqs we want
 
 Trimmed_fragments [to.keep] -> seqs.to.keep
	library(tidyverse)
	library(insect)
	library(rentrez)
	set_entrez_key (YOUR_KEY_HERE)
	library(here)

	worlds.taxonomy <- read_rds(here("data", "all.taxonomy.rds"))

	# make your own taxonomy db using the command
	# worlds.taxonomy <- taxonomy(), then save it for later use

	#############
	#############

	# The easiest way is to specify everything in the query, so you don't have to wrangle with the results too much

	## Example 1: 16S from Chordata mitochondrial genomes

	query <- "Chordata[ORGN]+AND+16S[TITL]"

	Full.search <- searchGB(query,taxIDs = T, sequences = T, prompt=F )

	Fwd_primer <- "GYAATCACTTGTCTTTTAAATGAAGACC"
	Rev_primer <- "GGATTGCGCTGTTATCCCTA"

	Trimmed_fragments <- virtualPCR(Full.search, up = Fwd_primer, down = Rev_primer,
	trimprimers = TRUE,minamplen = 250, maxamplen = 400, cores = "autodetect")

	## Now let's assume that you found new sequences that you want to add - maybe full mt genomes that
	## might have been missed in the first query

	new.query <- "Chordata[ORGN]+AND+13000:18000[SLEN]+mitochondrion[TITL]"

	search_mits <- searchGB(new.query,taxIDs = T, sequences = T, prompt=F)

	Trimmed_mtos <- virtualPCR(search_mits, up = Fwd_primer, down = Rev_primer,
	trimprimers = TRUE,minamplen = 250, maxamplen = 400, cores = "autodetect")

	## How to add both lists?

	#### Case 1: Everything went fine (Nothing is repeated in both lists?)

	All.seqs <- append(Trimmed_fragments,Trimmed_mtos)

	### Case 2: we have to subset, pluck and so on.

	### Remove sequences without taxID

	Seq.names.16S <- tibble(seq.name = names(Trimmed_fragments),

	Search = "16S" )

	Seq.names.16S %>%

	separate(seq.name, into = c("Accession", "taxID"), sep = "\\\|", remove = F) %>%

	left_join(worlds.taxonomy %>%
	mutate(taxID = as.character(taxID))) %>%
	filter(is.na(name)) %>%
	pull (seq.name) -> to.throw # Keep the names of the sequences to remove

	Seq.names.16S %>%

	filter (! seq.name %in% to.throw ) %>%

	pull (seq.name) -> to.keep


	## Now select only the seqs we want

	Trimmed_fragments [to.keep] -> seqs.to.keep
No results found