Skip to content

Instantly share code, notes, and snippets.

@jhcepas
Last active November 18, 2016 10:39
Show Gist options
  • Save jhcepas/98127f745ee77f3f525af2361ef7a3ad to your computer and use it in GitHub Desktop.
Save jhcepas/98127f745ee77f3f525af2361ef7a3ad to your computer and use it in GitHub Desktop.
Some tweaks and ideas to render ancestral sequences aligned to leaf-sequences using ETE3. Resulting image not prefect, but good for data exploration.
from collections import defaultdict
from ete3 import PhyloTree, TreeStyle, SeqMotifFace, TextFace, RectFace, AttrFace
alg = """
>Dme_001
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEAL--YYASQTDDIKDRREEAH
>Dme_002
MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
>Cfa_001
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Mms_001
MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
>Hsa_001
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
>Ptr_002
MAEIPDATIQ-FMALTNVSHNIAVQY--EFGDLNEALNSY--YQTDDQKDRREEAH
>Mmu_002
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Hsa_002
MAEAPDETIQQFM-LTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
>Ptr_001
MAEIPDATIQ-FMALTNVSHNIAVQY--EFGDLNEALNSY--YQTDDQKDRREEAH
>Mmu_001
MAEIPDTTIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Ptr_001&Hsa_001&Mmu_001
MAEIPDTTIQXXXALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Ptr_001&Hsa_001
MAEIXXXXXXXXXXXXXXXXXXXXXXXXXXXXLNEALNSYYASQTDDQPDRREEAH
>Cfa_001&Hsa_002&Ptr_001&Ptr_002&Hsa_001&Mmu_001&Mms_001&Mmu_002
MAEIPDTTIQXXXALTNVSHNIAVQYLXXXXXXXXXXXXXXXXxxxxQPDRREEAH
"""
def get_example_tree():
# Performs a tree reconciliation analysis
gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
t = PhyloTree(gene_tree_nw)
ts = TreeStyle()
# disable default PhyloTree Layout
ts.layout_fn = lambda x: True
ts.show_leaf_name = False
# add phantom branches to all internal nodes. Those phantom branches could
# show aligned sequences if necessary.
tmp_node2content = t.get_cached_content(store_attr='name')
for node in t.get_descendants():
if not node.is_leaf():
new_branch = node.add_child()
new_branch.tag = 'consensus'
# Phantom names take the name of all their tip names, concatenated
# in the same order they appear in the tree. Those names will be
# used to find the ancestral sequence in the fasta file.
new_branch.name = '&'.join(tmp_node2content[node])
print new_branch.name
new_branch.sequence = None
node.children.reverse()
t.ladderize() # Improves readability of ancestral seq nodes.
t.link_to_alignment(alg)
alg_length = 0
node2content = t.get_cached_content()
for node in t.traverse():
node.img_style["size"] = 0
if not node.is_leaf():
pass
else:
if getattr(node, "tag", None) == 'consensus':
# Hide the phantom branch and do not render its name.
# Alternatively, the branch could be shown with a custom name
# like 'consensus_seq_XXX'.
node.img_style['hz_line_color'] = 'white'
if getattr(node, "sequence", None):
f = SeqMotifFace(node.sequence, seq_format="seq", width=6, height=15)
f.margin_top = 1
f.margin_bottom = 1
f.border.color = 'black'
f.border.type = 0
f.border.width = 2
f.background.color = "white"
node.add_face(f, column=0, position="aligned")
else:
node.add_face(AttrFace('name'), column=0, position='branch-right')
if getattr(node, "sequence", None):
alg_length = max(alg_length, len(node.sequence))
f = SeqMotifFace(node.sequence, seq_format="seq", width=6)
node.add_face(f, column=0, position="aligned")
ts.draw_aligned_faces_as_table = False
for colnum in xrange(alg_length):
col_f = RectFace(10, 10, fgcolor=None, bgcolor=None,
label={"text":str(colnum), "fonttype":"Courier", "color":"black", "fontsize":6})
ts.aligned_header.add_face(col_f, column=colnum)
return t, ts
if __name__ == "__main__":
t, ts = get_example_tree()
t.show(tree_style=ts)
@jhcepas
Copy link
Author

jhcepas commented Nov 18, 2016

Note that this is an example dataset. It is not meant to represent anything rather than the illustration layout.

ancestral_seqs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment