Created
August 6, 2010 21:55
-
-
Save jeetsukumaran/512075 to your computer and use it in GitHub Desktop.
Processing BEAST Trees --- Node heights and lengths (mean, median, 95% HPD, and range)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import re | |
import dendropy as dpy | |
BEAST_NODE_INFO_PATTERN = re.compile(r'(.+?)=({.+?,.+?}|.+?)(,|$)') | |
BEAST_SUMMARY_TREE_NODE_FIELDS = [ | |
'height', | |
'height_median', | |
'height_95hpd', | |
'height_range', | |
'length', | |
'length_median', | |
'length_95hpd', | |
'length_range', | |
'posterior'] | |
BEAST_SUMMARY_FIELDS_TO_ATTR_MAP = { | |
'height_95%_HPD' : 'height_95hpd', | |
'length_95%_HPD' : 'length_95hpd', | |
} | |
def parse_beast_tree_node_info(tree, | |
set_node_attributes=True, | |
value_type=float, | |
create_field_if_missing=True, | |
ignore_missing=False): | |
""" | |
Parses the comment tokens associated with nodes in a BEAST summary tree, | |
creating an attribute for each node on the tree, `beast_info`, which is a | |
dictionary of key-value pairs. Multiple values (e.g., as given for HPD ranges, etc.) | |
will be converted to a tuples. | |
If `value_type` is not None, all values will be coerced to this type. | |
If `set_node_attributes` is True, then all fields will be added as | |
attributes of nodes (with some name-mapping to ensure legal Python names). | |
If `create_field_if_missing` is True, then if any fields that expected but not | |
found will be automatically created (and set to None). | |
""" | |
for nd in tree.postorder_node_iter(): | |
beast_info = {} | |
if nd.comments is None or len(nd.comments) == 0: | |
if not ignore_missing: | |
raise ValueError("No comments found associated with node '%s'" % (str(nd))) | |
else: | |
# populate info dictionary | |
node_comment = nd.comments[0][1:] | |
for match_group in BEAST_NODE_INFO_PATTERN.findall(node_comment): | |
key, val = match_group[:2] | |
key = BEAST_SUMMARY_FIELDS_TO_ATTR_MAP.get(key, key) | |
if val.startswith('{'): | |
if value_type is not None: | |
val = [value_type(v) for v in val[1:-1].split(',')] | |
else: | |
val = val[1:-1].split(',') | |
else: | |
if value_type is not None: | |
val = value_type(val) | |
beast_info[key] = val | |
# create missing fields | |
if create_field_if_missing: | |
for k in BEAST_SUMMARY_TREE_NODE_FIELDS: | |
if k not in beast_info: | |
beast_info[k] = None | |
# assign to node | |
nd.beast_info = beast_info | |
# set attributes | |
if set_node_attributes: | |
for k,v in nd.beast_info.items(): | |
setattr(nd, k, v) | |
return tree | |
if __name__ == "__main__": | |
tree = dpy.Tree.get_from_path('pythonidae.beast-summary.tre', 'nexus') | |
tree = parse_beast_tree_node_info(tree) | |
for nd in tree: | |
print('---') | |
print('Node: %s' % nd.oid) | |
#print nd.comments[0] | |
print('height = %s' % nd.height) | |
print('height_median = %s' % nd.height_median) | |
print('height_95hpd = %s' % nd.height_95hpd) | |
print('height_range = %s' % nd.height_range) | |
print('length = %s' % nd.length) | |
print('length_median = %s' % nd.length_median) | |
print('length_95hpd = %s' % nd.length_95hpd) | |
print('length_range = %s' % nd.length_range) | |
print('posterior = %s' % nd.posterior) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment