Created
July 6, 2017 02:47
-
-
Save mtholder/470a36b14a903983d989d1e08b0af4f8 to your computer and use it in GitHub Desktop.
brittle script for converting a TNT output to NEXUS trees block. I'm not sure what the TNT name quoting rules are, so it reads the first ws delim word in the xread block to get the names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import sys | |
| import os | |
| import re | |
| tread_pat = re.compile('^tread') | |
| xread_pat = re.compile('^xread') | |
| treefilename = sys.argv[1] | |
| ext_data_pref = "tread 'tree(s) from TNT, for data in " | |
| def read_taxa_from_xread(line_iter): | |
| nc_nt_line = line_iter.next().strip() | |
| try: | |
| nt = int(nc_nt_line.split(' ')[-1]) | |
| except: | |
| raise ValueError('Expecting # chars then # of taxa on a line after xread found "{}"'.format(nc_nt_line)) | |
| taxa = [None]*nt | |
| for ind, line in enumerate(line_iter): | |
| if line.startswith(';'): | |
| return taxa | |
| taxa[ind] = line.split()[0] | |
| if ind >= nt: | |
| raise ValueError('Expecting a ; after {} lines after xread, but did not find one'.format(nt)) | |
| def scan_for_taxa_list(line_iter): | |
| for line in line_iter: | |
| if line.startswith('xread'): | |
| return read_taxa_from_xread(line_iter) | |
| def nexus_tokenize(s): | |
| sp = s.split("'") | |
| ns = "''".join(sp) | |
| return "'{}'".format(ns) | |
| def write_taxa_block_with_zero_based_translate(taxa_list, out): | |
| out.write('BEGIN TAXA;\n Dimensions NTax = {} ;\n TaxLabels\n'.format(len(taxa_list))) | |
| for ind, lab in enumerate(taxa_list): | |
| out.write(' {}\n'.format(nexus_tokenize(lab))) | |
| out.write(' ;\nEND;\n\nBEGIN TREES;\n Translate\n') | |
| for ind, lab in enumerate(taxa_list): | |
| if ind > 0: | |
| out.write(',\n') | |
| out.write(' {} {} '.format(ind, nexus_tokenize(lab))) | |
| out.write('\n ;\n') | |
| def tranlate_trees(line_src, out): | |
| for tree_ind, line in enumerate(line_src): | |
| l_strip = line.strip() | |
| if l_strip.startswith(';') or l_strip.endswith(';'): | |
| out.write('END;\n') | |
| return | |
| assert l_strip.endswith('*') | |
| l_strip = l_strip[:-1] | |
| ts = [] | |
| prev = None | |
| for c in l_strip: | |
| if c != ' ': | |
| if prev is not None and (prev == ' ' or prev == ')'): | |
| if prev == ' ': | |
| if c != ')': | |
| ts.append(',') | |
| else: | |
| if c == '(': | |
| ts.append(',') | |
| ts.append(c) | |
| prev = c | |
| out.write(' Tree tree{} = [&U] {};\n'.format(1 + tree_ind, ''.join(ts))) | |
| out = sys.stdout | |
| taxa_list = None | |
| with open(treefilename, 'rU') as inp: | |
| ls = iter(inp) | |
| for line in ls: | |
| m = tread_pat.match(line) | |
| if m: | |
| if line.startswith(ext_data_pref): | |
| assert line.endswith("'\n") | |
| data_fn = line[len(ext_data_pref):-2] | |
| if os.path.exists(data_fn): | |
| diter = iter(open(data_fn, 'rU')) | |
| taxa_list = scan_for_taxa_list(diter) | |
| else: | |
| sys.exit('Tree file refers to data file "{}", but the file was not found'.format(data_fn)) | |
| if taxa_list is None: | |
| sys.exit('Expecting data file refrence in tread or xread before tread.\n') | |
| out.write('#NEXUS\n') | |
| write_taxa_block_with_zero_based_translate(taxa_list, out) | |
| tranlate_trees(ls, out) | |
| break | |
| elif line.startswith('xread'): | |
| taxa_list = read_taxa_from_xread(ls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment