Skip to content

Instantly share code, notes, and snippets.

@mtholder
Created December 28, 2019 19:14
Show Gist options
  • Select an option

  • Save mtholder/3588c927224a1ce8184805cd5b2bb8d7 to your computer and use it in GitHub Desktop.

Select an option

Save mtholder/3588c927224a1ce8184805cd5b2bb8d7 to your computer and use it in GitHub Desktop.
suppresses nodes from an OT tree if they do not have an ID listed in a separate file. Takes newick-tree-filepath and file-with-ids-one-per-line as args
#!/bin/env python3
import sys
import dendropy
import re
id_extractor = re.compile('.*ott([0-9]+)$')
tree_filepath = sys.argv[1]
id_filepath = sys.argv[2]
tree = dendropy.Tree.get(path=tree_filepath, schema='newick', suppress_internal_node_taxa=False)
# Get the list of IDs to retain
trl = []
for i in open(id_filepath, 'r').readlines():
if i.lower().startswith('ott'):
i = i[3:]
i = i.strip()
while i.endswith(','):
i = i[:-1]
trl.append(i)
to_retain_ids = frozenset(trl)
to_suppress = []
for node in tree.postorder_node_iter():
if node.taxon:
m = id_extractor.match(node.taxon.label)
if not m:
sys.exit('name not matching pattern: {}'.format(node.taxon.label))
id_for_node = m.group(1)
if id_for_node not in to_retain_ids:
sys.stderr.write('flagging for suppression due to non-retained ID: {}\n'.format(node.taxon.label))
to_suppress.append(node)
else:
assert(node.num_child_nodes() != 0)
to_suppress.append(node)
for node in to_suppress:
if node.edge:
node.edge.collapse()
tree.write_to_stream(sys.stdout, schema="newick")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment