Created
October 6, 2011 15:51
-
-
Save ngcrawford/1267758 to your computer and use it in GitHub Desktop.
Dendropy Tree/Subtree test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def topology_test(tree, query_tree, symmetric_distance=False): | |
"""Given a tree and a query tree this function tests if the query tree is present or identical to the | |
tree. If 'symmetric_distance' is set to True the symmetric distance of the query tree to the relavent taxa in the | |
tree is returned. | |
Both trees should be tree objects and contain idential taxon labels. | |
Examples: | |
-------- | |
>>> tree = dendropy.Tree() | |
>>> query_tree = dendropy.Tree() | |
>>> tree.read_from_string('(((A,C),D),B,(E,F))', schema='newick') | |
>>> query_tree.read_from_string('(E,F)', schema='newick') | |
>>> print topology_test(tree, query_tree) | |
True | |
>>> query_tree.read_from_string('(D,(E,F))', schema='newick') | |
>>> print topology_test(tree, query_tree) | |
False | |
""" | |
# there's probably a better/faster way to do this with bit_masks | |
sub_tree_bitmask = tree.taxon_set.get_taxa_bitmask(labels=query_tree.taxon_set.labels()) | |
sub_tree_mrca_node = tree.mrca(split_bitmask=sub_tree_bitmask) | |
sub_tree_newick = sub_tree_mrca_node.as_newick_string() | |
sub_tree_ = dendropy.Tree() | |
sub_tree_.read_from_string(sub_tree_newick,schema='newick') | |
sd = sub_tree_.symmetric_difference(query_tree) | |
if symmetric_distance == True: | |
return sd | |
elif sd == 0: | |
return True | |
else: | |
return False | |
# ADDITIONAL USEFUL FUNCTIONS: | |
def reroot_trees(trees, root): | |
"""Supply a treelist and a taxa label at which to root each tree. Returns a treelist with each tree | |
rerooted at the same tip/label.""" | |
new_tree_list = dendropy.TreeList() | |
for tree in trees: | |
node_root = tree.find_node_with_taxon_label(root) | |
tree.reroot_at_edge(node_root.edge, update_splits=False) | |
new_tree_list.append(tree) | |
return new_tree_list | |
def subtree_percentage(trees, sub_tree): | |
""" Returns the percentage of trees in containing a particular sub tree.""" | |
matches = 0 | |
for count, tree in enumerate(trees): | |
if topology_search(tree, subtree, symmetric_distance=True) == 0: | |
matches += 1 | |
return float(matches)/float(len(trees)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment