Skip to content

Instantly share code, notes, and snippets.

@dwinter
Created May 27, 2015 22:09
Show Gist options
  • Save dwinter/706f62079b9394ba8f1d to your computer and use it in GitHub Desktop.
Save dwinter/706f62079b9394ba8f1d to your computer and use it in GitHub Desktop.
Genomic effort by animal class

#Demo for com call

library(XML)
load_all("~/src/rentrez")
## Loading rentrez
classes <- entrez_search(db="taxonomy", 
                         term="Animals[SBTR] AND Class[RANK]", 
                         retmax=80)

tax_recs <- entrez_fetch(db="taxonomy", 
                         id=classes$ids, rettype="xml", parsed=TRUE)
typeof(tax_recs)
## [1] "externalptr"
get_taxon <- function(rec, rank){
    xp  <- paste0("LineageEx/Taxon/Rank[.='", rank,"']/../ScientificName")
    res <- xpathSApply(rec, xp, xmlValue)
    if(is.null(res)){
        return(NA)
    }
    res
}

extract_parents <- function(rec){
    phy <- get_taxon(rec, "phylum")
    cls <-  xpathSApply(rec, "ScientificName", xmlValue)
    structure( c(phy, cls), names=c("Phylum", "Class"))
}
taxa  <- tax_recs["/TaxaSet/Taxon"]
tax_df <-as.data.frame(t(sapply(taxa,extract_parents)), stringsAsFactors=FALSE)
tax_df[14,2] <- "Craniforma" #Oho -- NCBI search hasn't caught up with ncbi tax!
head(tax_df,14)
##             Phylum              Class
## 1     Chaetognatha        Sagittoidea
## 2         Chordata          Cladistia
## 3         Mollusca     Monoplacophora
## 4         Mollusca      Solenogastres
## 5             <NA>     Micrognathozoa
## 6     Nematomorpha     Nectonematoida
## 7         Annelida    Branchiobdellae
## 8         Chordata        Actinopteri
## 9   Acanthocephala Polyacanthocephala
## 10 Platyhelminthes        Turbellaria
## 11        Nematoda        Chromadorea
## 12        Nematoda            Enoplea
## 13     Brachiopoda     Rhynchonellata
## 14     Brachiopoda         Craniforma
queries <- paste0(tax_df$Class, "[ORGN]")
nseq <-sapply(queries, function(q) entrez_search(db="sra", term=q)$count)
tax_df$nseq <- as.numeric(as.character(nseq))
head(tax_df)
##         Phylum          Class nseq
## 1 Chaetognatha    Sagittoidea    0
## 2     Chordata      Cladistia    0
## 3     Mollusca Monoplacophora    2
## 4     Mollusca  Solenogastres    5
## 5         <NA> Micrognathozoa    0
## 6 Nematomorpha Nectonematoida    0
treemap::treemap(tax_df, 
      index=c("Phylum", "Class"),
      vSize="nseq", vColor="Phylum",
      type='categorical',
      position.legend="none", 
      border.col=c("white","black")
)

plot of chunk plot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment