Skip to content

Instantly share code, notes, and snippets.

@ngcrawford
Created October 6, 2011 15:51
Show Gist options
  • Save ngcrawford/1267758 to your computer and use it in GitHub Desktop.
Save ngcrawford/1267758 to your computer and use it in GitHub Desktop.
Dendropy Tree/Subtree test
def topology_test(tree, query_tree, symmetric_distance=False):
"""Given a tree and a query tree this function tests if the query tree is present or identical to the
tree. If 'symmetric_distance' is set to True the symmetric distance of the query tree to the relavent taxa in the
tree is returned.
Both trees should be tree objects and contain idential taxon labels.
Examples:
--------
>>> tree = dendropy.Tree()
>>> query_tree = dendropy.Tree()
>>> tree.read_from_string('(((A,C),D),B,(E,F))', schema='newick')
>>> query_tree.read_from_string('(E,F)', schema='newick')
>>> print topology_test(tree, query_tree)
True
>>> query_tree.read_from_string('(D,(E,F))', schema='newick')
>>> print topology_test(tree, query_tree)
False
"""
# there's probably a better/faster way to do this with bit_masks
sub_tree_bitmask = tree.taxon_set.get_taxa_bitmask(labels=query_tree.taxon_set.labels())
sub_tree_mrca_node = tree.mrca(split_bitmask=sub_tree_bitmask)
sub_tree_newick = sub_tree_mrca_node.as_newick_string()
sub_tree_ = dendropy.Tree()
sub_tree_.read_from_string(sub_tree_newick,schema='newick')
sd = sub_tree_.symmetric_difference(query_tree)
if symmetric_distance == True:
return sd
elif sd == 0:
return True
else:
return False
# ADDITIONAL USEFUL FUNCTIONS:
def reroot_trees(trees, root):
"""Supply a treelist and a taxa label at which to root each tree. Returns a treelist with each tree
rerooted at the same tip/label."""
new_tree_list = dendropy.TreeList()
for tree in trees:
node_root = tree.find_node_with_taxon_label(root)
tree.reroot_at_edge(node_root.edge, update_splits=False)
new_tree_list.append(tree)
return new_tree_list
def subtree_percentage(trees, sub_tree):
""" Returns the percentage of trees in containing a particular sub tree."""
matches = 0
for count, tree in enumerate(trees):
if topology_search(tree, subtree, symmetric_distance=True) == 0:
matches += 1
return float(matches)/float(len(trees))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment