Created
March 15, 2017 14:48
-
-
Save jhcepas/5fea1a726a9e57fb00a03629efe8a968 to your computer and use it in GitHub Desktop.
Sort a fasta alignment by the order of branches in a phylogenetic tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from ete3 import SeqGroup, Tree | |
import sys | |
alg_file = sys.argv[1] # in fasta format | |
tree_file = sys.argv[2] # in newick format | |
alg = SeqGroup(alg_file) | |
for k,v in alg.name2id.items(): | |
# converts ilegal newick chars from alg names. | |
# Comment this line if not necessary | |
k = re.sub('[:,();]','_', k) | |
alg.name2id[k] = v | |
tree = Tree(tree_file) | |
for leaf in tree: | |
print ">%s\n%s" %(leaf.name, alg.get_seq(leaf.name)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment