Last active
August 25, 2023 06:13
-
-
Save jmberros/fdcdc97d101c77d873e52a3a962035b5 to your computer and use it in GitHub Desktop.
Get the country field for the given nuccore accession numbers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Bio import Entrez | |
# Read the accessions from a file | |
accessions_file = 'accessions.txt' | |
with open(accessions_file) as f: | |
ids = f.read().split('\n') | |
# Fetch the entries from Entrez | |
Entrez.email = '[email protected]' # Insert your email here | |
handle = Entrez.efetch('nuccore', id=ids, retmode='xml') | |
response = Entrez.read(handle) | |
# Parse the entries to get the country | |
def extract_countries(entry): | |
sources = [feature for feature in entry['GBSeq_feature-table'] | |
if feature['GBFeature_key'] == 'source'] | |
for source in sources: | |
qualifiers = [qual for qual in source['GBFeature_quals'] | |
if qual['GBQualifier_name'] == 'country'] | |
for qualifier in qualifiers: | |
yield qualifier['GBQualifier_value'] | |
for entry in response: | |
accession = entry['GBSeq_primary-accession'] | |
for country in extract_countries(entry): | |
print(accession, country, sep=',') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
super cool script! Thank you so much!!!