Skip to content

Instantly share code, notes, and snippets.

@sminot
Created August 1, 2018 21:48
Show Gist options
  • Select an option

  • Save sminot/8654dee03c46d1f6a9cd98653eb0469e to your computer and use it in GitHub Desktop.

Select an option

Save sminot/8654dee03c46d1f6a9cd98653eb0469e to your computer and use it in GitHub Desktop.
Get the name of an organism, given an assembly ID
from Bio import Entrez
import xmltodict
def get_name_from_assembly_id(assembly_name):
handle = Entrez.esearch("assembly", term=assembly_name)
search_result = xmltodict.parse("".join(handle.readlines()))
handle.close()
try:
assembly_id = search_result["eSearchResult"]["IdList"]["Id"]
except:
print("Problem fetching " + assembly_name)
assert False, json.dumps(search_result, indent=4)
# Link to BioSample
handle = Entrez.elink(db_from="assembly", id=assembly_id, linkname="assembly_biosample")
link_result = xmltodict.parse("".join(handle.readlines()))
handle.close()
try:
if isinstance(link_result["eLinkResult"]["LinkSet"], list):
biosample_id = link_result["eLinkResult"]["LinkSet"][0]["LinkSetDb"]["Link"]["Id"]
else:
biosample_id = link_result["eLinkResult"]["LinkSet"]["LinkSetDb"]["Link"]["Id"]
except:
print("Problem fetching " + assembly_name)
assert False, json.dumps(link_result, indent=4)
# Fetch from BioSample
handle = Entrez.efetch(db="biosample", id=biosample_id)
fetch_result = xmltodict.parse("".join(handle.readlines()))
handle.close()
try:
org_name = fetch_result["BioSampleSet"]["BioSample"]["Description"]["Organism"]["@taxonomy_name"]
except:
print("Problem fetching " + assembly_name)
assert False, json.dumps(fetch_result, indent=4)
return "{} ({})".format(org_name, assembly_name)
get_name_from_assembly_id("GCF_000332875")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment