Created
September 3, 2011 04:24
-
-
Save dwinter/1190555 to your computer and use it in GitHub Desktop.
An online fungal foray
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import Entrez | |
#Let NCBI know who you are in case you do something stupid :) | |
Entrez.email = '[email protected]' | |
search_s ='"ectomycorrhizal root tip" AND "New Zealand"' | |
handle = Entrez.esearch(db='nucleotide', term=search_s, retmax=100) | |
ids = Entrez.read(handle)['IdList'] | |
ids[:5] | |
#gives us ['157086858', '157086857', '157086856', '157086855', '157086854'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import SeqIO | |
rec_handle = Entrez.efetch(db='nuccore', rettype='gb', id = ', '.join(ids)) | |
recs = SeqIO.parse(rec_handle, 'gb') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
host_dict = defaultdict(list) | |
for record in recs: | |
for feature in record.features: | |
if feature.type == 'source': | |
try: | |
host = feature.qualifiers['host'][0] | |
taxonomy = ':'.join(record.annotations['taxonomy']) | |
host_dict[host].append(taxonomy) | |
#can only be one host, so don't need to look other features | |
break | |
except KeyError: | |
#no 'host' in source, will have to dig into these manually | |
print 'No host for {0}!'.format(record.id) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#they're all silver beech! Let's count unique taxonomies for that species | |
frequencies = defaultdict(int) | |
for tax_string in host_dict['Nothofagus menziesii']: | |
frequencies[tax_string] += 1 | |
frequencies.values() | |
#gives us: | |
#[15, 5, 24, 6, 3, 28, 2] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment