Created
June 15, 2011 15:58
-
-
Save mtholder/1027406 to your computer and use it in GitHub Desktop.
Takes a MrBayes .t file and writes (to standard out) a .p file with the parameters being the edge lengths for each of the Terminal edges.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
'''Takes a MrBayes .t file and writes (to standard out) a .p file with | |
the parameters being the edge lengths for each of the Terminal edges. | |
This .p file can be used with Tracer http://tree.bio.ed.ac.uk/software/tracer/ | |
or other MCMC diagnostics files. | |
Example invocation with redirection to a file called term_edges_myrun.p : | |
python terminal_edges_to_dot_p.py myrun.t > term_edges_myrun.p | |
''' | |
import sys | |
import re | |
try: | |
fn = sys.argv[1] | |
except: | |
inp = sys.stdin | |
else: | |
try: | |
inp = open(fn, 'rU') | |
except: | |
sys.exit('Error opening ' + fn + '\n') | |
inp_iter = iter(inp) | |
try: | |
trans_pat = re.compile('^\s+(\d+)\s.*([,;])\s*\n') | |
m = None | |
while not m: | |
line = inp_iter.next() | |
m = trans_pat.match(line) | |
for line in inp_iter: | |
prev_m = m | |
m = trans_pat.match(line) | |
if not m: | |
break | |
ntax = int(prev_m.group(1)) | |
except: | |
sys.exit('Error in detecting the number of taxa\n') | |
sys.stderr.write(str(ntax) + ' leaves detected\n') | |
tree_pat = re.compile('^\s*tree rep\.(\d+)') | |
rep_str_list = [] | |
edge_len_lists = [[] for i in range(ntax)] | |
edge_len_pats = [re.compile('[(,]' + str(i + 1) + ':([-0-9.eE]+)[),]') for i in range(ntax)] | |
outp = sys.stdout | |
outp.write('Gen') | |
for i in range(ntax): | |
outp.write('\tedge' + str(i + 1)) | |
outp.write('\n') | |
try: | |
while True: | |
m = tree_pat.match(line) | |
if m: | |
rep_str = m.group(1) | |
# rep_str_list.append(rep_str) | |
outp.write(rep_str + '\t') | |
row_str = [] | |
for i, pat in enumerate(edge_len_pats): | |
m2 = pat.search(line) | |
if not m2: | |
sys.exit('Edge pattern not found for leaf ' + str(i+1) + ' in rep.' + rep_str) | |
f_str = m2.group(1) | |
e = float(f_str) | |
# edge_len_lists[i].append(e) | |
row_str.append(f_str) | |
outp.write('\t'.join(row_str)) | |
outp.write('\n') | |
else: | |
rsl = line.rstrip() | |
if rsl: | |
sys.stderr.write('Skipping line "' + rsl[:10] + '"...\n') | |
line = inp_iter.next() | |
except StopIteration: | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment