Created
October 13, 2022 04:56
-
-
Save audy/d8f30921d0a2ac9d23192dd99cc6a395 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import taxonomy | |
tax = taxonomy.Taxonomy.from_ncbi("ncbi_taxdump/") | |
FULL_RANKS = [ 'superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species'] | |
def get_scalar(d: dict, key: str): | |
if key in d: | |
vals = d[key] | |
if len(vals) == 1: | |
return vals[0] | |
else: | |
return None | |
def graft_lineage_to_taxonomy(tax, lineage_names, name_to_nodes = None): | |
""" This will mutate your taxonomy! """ | |
# todo: cache me | |
name_to_nodes = defaultdict(list) | |
starting_tax_id = int(1e9) | |
for tax_id in tax: | |
node = tax[tax_id] | |
name_to_nodes[node.name].append(node) | |
if int(tax_id) > starting_tax_id: | |
starting_tax_id = int(tax_id) + 1 | |
parent_node = None | |
new_node = None | |
for name in lineage_names: | |
if (node := get_scalar(name_to_nodes, name)) is not None: | |
# name is already in taxonomy | |
# we can skip this node | |
parent_node = node | |
else: | |
# need to create a new node | |
assert parent_node is not None, "orphan node" | |
rank = FULL_RANKS[FULL_RANKS.index(parent_node.rank) + 1] | |
tax.add_node(parent_node.id, str(starting_tax_id)) | |
tax.edit_node(str(starting_tax_id), name=name, rank=rank) | |
new_node = tax[str(starting_tax_id)] | |
print(f"added new node {new_node}") | |
name_to_nodes[name].append(new_node) | |
parent_node = new_node | |
starting_tax_id += 1 | |
return new_node |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment