Created
June 7, 2011 20:26
-
-
Save mtholder/1013074 to your computer and use it in GitHub Desktop.
Uses dendropy to calculate a similarity score for the trees in analyses based on split support
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import dendropy | |
from copy import copy | |
threshold = float(sys.argv[1]) | |
filename_list = sys.argv[2:] | |
SCRIPT_NAME = 'conflict_viz' | |
def debug(*valist): | |
msg = ' '.join([str(i) for i in valist]) | |
sys.stderr.write(SCRIPT_NAME + ': ' + msg + '\n') | |
status = debug | |
warn = status | |
def die(*valist): | |
warn(*valist) | |
sys.exit(1) | |
dataset = dendropy.DataSet() | |
dataset.attach_taxon_set() | |
analysis_list = [] | |
for n, filename in enumerate(filename_list): | |
debug('reading', filename) | |
sys.stdout.write(str(n+1) + ' = ' + filename + '\n') | |
dataset.read_from_path(filename, schema='NEXUS') | |
# TODO don't use a DataSet, read each tree list separately sez Jeet | |
assert (n + 1 == len(dataset.tree_lists)) | |
tree_list = dataset.tree_lists[-1] | |
tree_w_support = tree_list[0] | |
tree_w_support.update_splits() | |
split_list = [] | |
for int_nd in tree_w_support.preorder_node_iter(filter_fn=lambda x: x.is_internal()): | |
if int_nd is not tree_w_support.seed_node: | |
support = float(int_nd.label) | |
if support > threshold: | |
split_list.append((support, int_nd.edge.split_bitmask)) | |
split_list.sort() | |
analysis_list.append({ | |
'denominator':sum(i[0] for i in split_list), | |
'split_list':split_list, | |
'tree':tree_w_support | |
}) | |
class Stat: | |
ELIMINATE, DOWNWEIGHTED, COMPLETE_DOWNWEIGHTED = range(3) | |
mode = Stat.ELIMINATE | |
full_leaf_mask = dataset.taxon_sets[0].all_taxa_bitmask() | |
sys.stdout.write('\t'.join([''] + [str(i + 1) for i in range(len(analysis_list))]) + '\n') | |
for row_n, row in enumerate(analysis_list): | |
sys.stdout.write(str(row_n + 1)) | |
denominator = row['denominator'] | |
num = len(row['split_list']) | |
for col_n, col in enumerate(analysis_list): | |
if col_n == row_n: | |
sys.stdout.write('\t' + str(denominator) + '/' + str(num)) | |
else: | |
row_copy = [[i[0], i[1]] for i in row['split_list']] | |
col_copy = [[i[0], i[1]] for i in col['split_list']] | |
row_tree = row['tree'] | |
col_tree = col['tree'] | |
union_leaves_mask = row_tree.seed_node.edge.split_bitmask | col_tree.seed_node.edge.split_bitmask | |
numerator = 0.0 | |
for rs in row_copy: | |
row_split = rs[1] | |
for cs in col_copy: | |
col_split = cs[1] | |
if not dendropy.treesplit.is_compatible(row_split, col_split, union_leaves_mask): | |
# splits in conflict. Lower the "score" for the row_split | |
if mode == Stat.ELIMINATE: | |
rs[0] = 0.0 | |
break | |
elif mode == Stat.DOWNWEIGHTED: | |
rs[0] *= (1 - cs[0]) | |
break | |
elif mode == Stat.COMPLETE_DOWNWEIGHTED: | |
rs[0] *= (1 - cs[0]) | |
numerator += rs[0] | |
sys.stdout.write('\t' + str(numerator/denominator)) | |
sys.stdout.write('\n') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment