Skip to content

Instantly share code, notes, and snippets.

@brantfaircloth
Created January 29, 2011 18:53
Show Gist options
  • Save brantfaircloth/802094 to your computer and use it in GitHub Desktop.
Save brantfaircloth/802094 to your computer and use it in GitHub Desktop.
Lookup NCBI taxonomy for species by name
import sys
from Bio import Entrez
def get_taxid(species):
"""to get data from ncbi taxomomy, we need to have the taxid. we can
get that by passing the species name to esearch, which will return
the tax id"""
species = species.replace(" ", "+").strip()
search = Entrez.esearch(term = species, db = "taxonomy", retmode = "xml")
record = Entrez.read(search)
return record['IdList'][0]
def get_tax_data(taxid):
"""once we have the taxid, we can fetch the record"""
search = Entrez.efetch(id = taxid, db = "taxonomy", retmode = "xml")
return Entrez.read(search)
Entrez.email = ""
if not Entrez.email:
print "you must add your email address"
sys.exit(2)
taxid = get_tax_id("Erodium carvifolium")
data = get_tax_data(taxid)
lineage = {d['Rank']:d['ScientificName'] for d in
data[0]['LineageEx'] if d['Rank'] in ['family', 'order']}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment