Skip to content

Instantly share code, notes, and snippets.

@mtholder
Created July 6, 2017 02:47
Show Gist options
  • Select an option

  • Save mtholder/470a36b14a903983d989d1e08b0af4f8 to your computer and use it in GitHub Desktop.

Select an option

Save mtholder/470a36b14a903983d989d1e08b0af4f8 to your computer and use it in GitHub Desktop.
brittle script for converting a TNT output to NEXUS trees block. I'm not sure what the TNT name quoting rules are, so it reads the first ws delim word in the xread block to get the names
#!/usr/bin/env python
import sys
import os
import re
tread_pat = re.compile('^tread')
xread_pat = re.compile('^xread')
treefilename = sys.argv[1]
ext_data_pref = "tread 'tree(s) from TNT, for data in "
def read_taxa_from_xread(line_iter):
nc_nt_line = line_iter.next().strip()
try:
nt = int(nc_nt_line.split(' ')[-1])
except:
raise ValueError('Expecting # chars then # of taxa on a line after xread found "{}"'.format(nc_nt_line))
taxa = [None]*nt
for ind, line in enumerate(line_iter):
if line.startswith(';'):
return taxa
taxa[ind] = line.split()[0]
if ind >= nt:
raise ValueError('Expecting a ; after {} lines after xread, but did not find one'.format(nt))
def scan_for_taxa_list(line_iter):
for line in line_iter:
if line.startswith('xread'):
return read_taxa_from_xread(line_iter)
def nexus_tokenize(s):
sp = s.split("'")
ns = "''".join(sp)
return "'{}'".format(ns)
def write_taxa_block_with_zero_based_translate(taxa_list, out):
out.write('BEGIN TAXA;\n Dimensions NTax = {} ;\n TaxLabels\n'.format(len(taxa_list)))
for ind, lab in enumerate(taxa_list):
out.write(' {}\n'.format(nexus_tokenize(lab)))
out.write(' ;\nEND;\n\nBEGIN TREES;\n Translate\n')
for ind, lab in enumerate(taxa_list):
if ind > 0:
out.write(',\n')
out.write(' {} {} '.format(ind, nexus_tokenize(lab)))
out.write('\n ;\n')
def tranlate_trees(line_src, out):
for tree_ind, line in enumerate(line_src):
l_strip = line.strip()
if l_strip.startswith(';') or l_strip.endswith(';'):
out.write('END;\n')
return
assert l_strip.endswith('*')
l_strip = l_strip[:-1]
ts = []
prev = None
for c in l_strip:
if c != ' ':
if prev is not None and (prev == ' ' or prev == ')'):
if prev == ' ':
if c != ')':
ts.append(',')
else:
if c == '(':
ts.append(',')
ts.append(c)
prev = c
out.write(' Tree tree{} = [&U] {};\n'.format(1 + tree_ind, ''.join(ts)))
out = sys.stdout
taxa_list = None
with open(treefilename, 'rU') as inp:
ls = iter(inp)
for line in ls:
m = tread_pat.match(line)
if m:
if line.startswith(ext_data_pref):
assert line.endswith("'\n")
data_fn = line[len(ext_data_pref):-2]
if os.path.exists(data_fn):
diter = iter(open(data_fn, 'rU'))
taxa_list = scan_for_taxa_list(diter)
else:
sys.exit('Tree file refers to data file "{}", but the file was not found'.format(data_fn))
if taxa_list is None:
sys.exit('Expecting data file refrence in tread or xread before tread.\n')
out.write('#NEXUS\n')
write_taxa_block_with_zero_based_translate(taxa_list, out)
tranlate_trees(ls, out)
break
elif line.startswith('xread'):
taxa_list = read_taxa_from_xread(ls)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment