#Demo for com call
library(XML)
load_all("~/src/rentrez")
## Loading rentrez
classes <- entrez_search(db="taxonomy",
term="Animals[SBTR] AND Class[RANK]",
retmax=80)
tax_recs <- entrez_fetch(db="taxonomy",
id=classes$ids, rettype="xml", parsed=TRUE)
typeof(tax_recs)
## [1] "externalptr"
get_taxon <- function(rec, rank){
xp <- paste0("LineageEx/Taxon/Rank[.='", rank,"']/../ScientificName")
res <- xpathSApply(rec, xp, xmlValue)
if(is.null(res)){
return(NA)
}
res
}
extract_parents <- function(rec){
phy <- get_taxon(rec, "phylum")
cls <- xpathSApply(rec, "ScientificName", xmlValue)
structure( c(phy, cls), names=c("Phylum", "Class"))
}
taxa <- tax_recs["/TaxaSet/Taxon"]
tax_df <-as.data.frame(t(sapply(taxa,extract_parents)), stringsAsFactors=FALSE)
tax_df[14,2] <- "Craniforma" #Oho -- NCBI search hasn't caught up with ncbi tax!
head(tax_df,14)
## Phylum Class
## 1 Chaetognatha Sagittoidea
## 2 Chordata Cladistia
## 3 Mollusca Monoplacophora
## 4 Mollusca Solenogastres
## 5 <NA> Micrognathozoa
## 6 Nematomorpha Nectonematoida
## 7 Annelida Branchiobdellae
## 8 Chordata Actinopteri
## 9 Acanthocephala Polyacanthocephala
## 10 Platyhelminthes Turbellaria
## 11 Nematoda Chromadorea
## 12 Nematoda Enoplea
## 13 Brachiopoda Rhynchonellata
## 14 Brachiopoda Craniforma
queries <- paste0(tax_df$Class, "[ORGN]")
nseq <-sapply(queries, function(q) entrez_search(db="sra", term=q)$count)
tax_df$nseq <- as.numeric(as.character(nseq))
head(tax_df)
## Phylum Class nseq
## 1 Chaetognatha Sagittoidea 0
## 2 Chordata Cladistia 0
## 3 Mollusca Monoplacophora 2
## 4 Mollusca Solenogastres 5
## 5 <NA> Micrognathozoa 0
## 6 Nematomorpha Nectonematoida 0
treemap::treemap(tax_df,
index=c("Phylum", "Class"),
vSize="nseq", vColor="Phylum",
type='categorical',
position.legend="none",
border.col=c("white","black")
)