We assume that trees are provided in the newick-tree format. Conveniently, in Python there is a package for handling newick trees.
Basically we just use ete2 to store the tree structure for iterating and visualization. The newick trees produced by phylobayes are not directly following any standard type of newick tree that ete2 understands so if we load directly there is a loss of information. Specifically, the transitions at internal nodes will be ignored.
To fix this there is a function called fix_tree that reads the tree in to get the basic structure then it iterates over the newick formatted string to both confirm the structure and repopulate the missing informtion.
Before we can fix the tree we must standardize it first. For example we need to make the following types of substitutions
_Q:0.0367044:Q) --> _Q:0.0367044)
_Q:0.044464:Q:0.0512411:K) --> _Q:0.044464)
In [1]:
import os
from ete2 import Tree,TreeStyle
from ete2 import faces, AttrFace
from vertebratesLib import *
def standardize_tree(nwTree):
pattern1 = "[\w|:]+.+?[,|\)|\(]"
parsedTree = nwTree
for r in re.finditer(pattern1,nwTree):
matched = nwTree[r.start(0):r.end(0)]
parsed = re.sub(":[A-Z]","",matched)
multiples = re.findall(":\d\.[\de-]+",parsed)
transitions = "".join(multiples)
if len(multiples) > 1:
parsed = parsed.replace(transitions,multiples[0])
parsedTree = parsedTree.replace(matched,parsed)
return re.sub("\)_[A-Z]:",")",parsedTree)[:-3]+";"
## standardize a phylobayes tree
pbtree = "(Acipenser__R:0.137306:R:0.167816:K,((Takifugu_r_K:0.623751:K,Danio_reri_K:0.428613:K)_K:0.319631:K,((Lepisosteu_K:0.00243856:K,Lepisost00_K:0.00389519:K)_K:0.24244:K,Amia_calva_K:0.252829:K)_K:0.0341578:K)_K:0.156204:K,(Polypterus_K:0.538725:K,((((Neotrygon__K:0.199854:K,(Raja_clava_K:0.0276807:K,Leucoraja__K:0.0338708:K)_K:0.142449:K)_K:0.123911:K,((Ginglymost_K:0.0614048:K,Chiloscyll_K:0.063792:K)_K:0.0894881:K,(Scyliorhin_K:0.126886:K,Carcharodo_K:0.0816871:K)_K:0.0261212:K)_K:0.100872:K)_K:0.138297:K,Callorhinc_K:0.406368:K)_K:0.596451:K,(Latimeria__Q:0.435962:Q,((Neoceratod_E:0.139027:E,((Protopteru_E:0.0118172:E,Protopte00_E:0.00600774:E)_E:0.125265:E,Lepidosire_E:0.129048:E)_E:0.283337:E)_E:0.185699:E:0.230877:Q,(((Rhinatrema_K:0.168308:K,((Typhlonect_K:0.00595736:K,Typhlone00_K:0.0172491:K)_K:0.154939:K,Geotrypete_K:0.173365:K)_K:0.0837134:K)_K:0.277616:K,(((Discogloss_K:0.211532:K,Bombina_ma_K:0.247467:K)_K:0.0796761:K,((Pipa_pipa__K:0.246234:K,(Xenopus_tr_K:0.236198:K,Hymenochir_K:0.300396:K)_K:0.0241566:K)_K:0.103826:K,(Megophrys__K:0.422485:K,((Rana_chens_K:0.096737:K,(Pelophylax_K:0.0493814:K,Pelophyl00_K:0.0377842:K)_K:0.0159202:K)_K:0.212804:K,(Cyclorana__K:0.0911112:K,(Espadarana_R:0.119242:R:0.018515:K,Atelopus_z_S:0.0687643:S:0.0657081:K)_K:0.0180531:K)_K:0.146541:K)_K:0.133463:K)_K:0.0325331:K)_K:0.0320521:K)_K:0.45317:K,((Hynobius_c_K:0.13165:K,Andrias_da_K:0.116493:K)_K:0.0732691:K,(Siren_lace_K:0.293752:K,(Proteus_an_K:0.252888:K,((Salamandra_K:0.0876578:K,(Pleurodele_K:0.0771019:K,(Notophthal_K:0.0586901:K,(Cynops_pyr_K:0.0403819:K,Calotriton_K:0.048577:K)_K:0.0333907:K)_K:0.0108502:K)_K:0.0327197:K)_K:0.131402:K,Ambystoma__K:0.199927:K)_K:0.0356739:K)_K:0.0444576:K)_K:0.0182886:K)_K:0.293596:K)_K:0.0448927:K)_K:0.00925933:K:0.0960703:Q,((Ornithorhy_E:0.228367:E:0.0919702:Q,((Monodelphi_Q:0.116685:Q,(Sarcophilu_K:0.0956754:K,Macropus_e_K:0.0967417:K)_K:0.018078:K:0.0103883:Q)_Q:0.173422:Q,((Loxodonta__Q:0.134972:Q,Dasypus_no_Q:0.160758:Q)_Q:0.00642451:Q,((Mus_muscul_Q:0.227068:Q,Homo_sapie_Q:0.100987:Q)_Q:0.0152245:Q,(Felis_catu_Q:0.057015:Q,Canis_fami_Q:0.0722375:Q)_Q:0.0483643:Q)_Q:0.0151335:Q)_Q:0.235403:Q)_Q:0.0967933:Q)_Q:0.289468:Q,((Sphenodon__Q:0.263159:Q,((Tarentola__Q:0.138711:Q,Eublephari_Q:0.126597:Q)_Q:0.134156:Q,((Scincella__Q:0.129828:Q,((Saproscinc_E:0.0436589:E,Lamprophol_E:0.039153:E)_E:0.0132249:E,Carlia_rub_E:0.0421727:E)_E:0.0298716:E:0.0919685:Q)_Q:0.144222:Q,((Tupinambis_Q:0.263447:Q,Podarcis_s_Q:0.166879:Q)_Q:0.0318745:Q,(((((Thamnophis_Q:0.0743196:Q,(Pantheroph_Q:0.0216722:Q,Opheodrys__Q:0.0297485:Q)_Q:0.0184298:Q)_Q:0.0120419:Q,(Ophiophagu_Q:0.0287086:Q,Micrurus_f_Q:0.0346685:Q)_Q:0.0394398:Q)_Q:0.0306892:Q,(Echis_colo_Q:0.0582188:Q,(Sistrurus__Q:0.0146283:Q,Crotalus_a_Q:0.0105047:Q)_Q:0.0452256:Q)_Q:0.0417915:Q)_Q:0.0890153:Q,(Python_reg_Q:0.0643131:Q,Boa_constr_Q:0.0698609:Q)_Q:0.0173203:Q)_Q:0.244803:Q,(Elgaria_mu_Q:0.162227:Q,((Pogona_vit_Q:0.142077:Q,Chamaeleo__Q:0.204871:Q)_Q:0.0904862:Q,((Iguana_igu_Q:0.0899908:Q,Basiliscus_Q:0.0856177:Q)_Q:0.00740167:Q,(Sceloporus_Q:0.118961:Q,Anolis_car_Q:0.182233:Q)_Q:0.00884581:Q)_Q:0.0816258:Q)_Q:0.0306025:Q)_Q:0.00670356:Q)_Q:0.0222824:Q)_Q:0.0403438:Q)_Q:0.025957:Q)_Q:0.231231:Q)_Q:0.0674595:Q,(((Phrynops_h_Q:0.0852888:Q,Pelusios_c_Q:0.111423:Q)_Q:0.0256123:Q,(Pelodiscus_Q:0.119918:Q,(Sternother_Q:0.0824974:Q,(((Trachemys__Q:0.0144476:Q,Emys_orbic_Q:0.0187597:Q)_Q:0.0159572:Q,(Chinemys_r_Q:0.021249:Q,Chelonoidi_Q:0.0343593:Q)_Q:0.0145747:Q)_Q:0.00699482:Q,Caretta_ca_Q:0.0473377:Q)_Q:0.00586399:Q)_Q:0.0272828:Q)_Q:0.0283806:Q)_Q:0.0828567:Q,(((Struthio_c_Q:0.0456057:Q,Dromaius_n_Q:0.0442109:Q)_Q:0.040561:Q,(Taeniopygi_Q:0.2042:Q,((Meleagris__Q:0.0381837:Q,Gallus_gal_Q:0.0307035:Q)_Q:0.106136:Q,Anas_platy_Q:0.0913882:Q)_Q:0.0298145:Q)_Q:0.0373143:Q)_Q:0.189056:Q,(Crocodylus_Q:0.0349965:Q,(Caiman_cro_Q:0.0451652:Q,Alligator__Q:0.0189561:Q)_Q:0.0222454:Q)_Q:0.204443:Q)_Q:0.0457489:Q)_Q:0.0369577:Q)_Q:0.126106:Q)_Q:0.238162:Q)_Q:0.306579:Q)_Q:0.0367044:Q)_Q:0.044464:Q:0.0512411:K)_K:0.322251:K)_K:0.0235979:K)_K;"
nwtree = standardize_tree(pbtree)
#print(pbtree)
#print(nwree)
## fix phylobayes tree (standardize, add internal nodes and extract extra info)
fixedTree,treeSummary = fix_tree(pbtree)
for key in ['aa','pairs','transitions','dist','parent']:
print("%s - %s"%(key, treeSummary["Danio_reri_K"][key]))
print("...parent N4- %s"%treeSummary["N4"]['pairs'])
## plot the tree
def my_layout(node):
if node.is_leaf():
name_face = AttrFace("name",fsize=25)
faces.add_face_to_node(name_face, node, column=0, position="branch-right")
else:
name_face = AttrFace("name", fsize=20, fgcolor="red")
faces.add_face_to_node(name_face, node, column=0, position="branch-right")
ts = TreeStyle()
ts.show_leaf_name = False
ts.show_branch_length = True
ts.show_branch_support = True
ts.scale = 180
ts.layout_fn = my_layout
out = fixedTree.render("./figures/simple-tree.png", units="mm", tree_style=ts,dpi=400)
In [10]:
from IPython.display import Image
Image(filename='./figures/simple-tree.png')
Out[10]:
In [ ]:
When a tree is loaded from external sources, a pointer to the top-most node is returned. This is called the tree root, and it will exist even if the tree is conceptually considered as unrooted. This is, the root node can be considered as the master node, since it represents the whole tree structure. Unrooted trees can be identified as trees in which master root node has more than two children
In [18]:
from ete2 import PhyloTree, TreeStyle
alg = """
>Dme_001
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEAL--YYASQTDDIKDRREEAH
>Dme_002
MAEIPDATIQQFMALTNVSHNIAVQY--EFGDLNEALNSYYAYQTDDQKDRREEAH
>Cfa_001
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Mms_001
MAEAPDETIQQFMALTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
>Hsa_001
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
>Ptr_002
MAEIPDATIQ-FMALTNVSHNIAVQY--EFGDLNEALNSY--YQTDDQKDRREEAH
>Mmu_002
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
>Hsa_002
MAEAPDETIQQFM-LTNVSHNIAVQYLSEFGDLNEAL--------------REEAH
>Mmu_001
MAEIPDETIQQFMALT---HNIAVQYLSEFGDLNEALNSYYASQTDDIKDRREEAH
>Ptr_001
MAEIPDATIQ-FMALTNVSHNIAVQY--EFGDLNEALNSY--YQTDDQKDRREEAH
>Mmu_003
MAEIPDATIQ---ALTNVSHNIAVQYLSEFGDLNEALNSYYASQTDDQPDRREEAH
"""
def get_example_tree():
# Performs a tree reconciliation analysis
gene_tree_nw = '((Dme_001,Dme_002),(((Cfa_001,Mms_001),((Hsa_001,Ptr_001),Mmu_001)),(Ptr_002,(Hsa_002,Mmu_002))));'
species_tree_nw = "((((Hsa, Ptr), Mmu), (Mms, Cfa)), Dme);"
genetree = PhyloTree(gene_tree_nw)
sptree = PhyloTree(species_tree_nw)
recon_tree, events = genetree.reconcile(sptree)
recon_tree.link_to_alignment(alg)
return recon_tree, TreeStyle()
# Visualize the reconciled tree
t, ts = get_example_tree()
out = t.render("./figures/phylotree-example.png", w=750)
In [19]:
Image(filename='./figures/phylotree-example.png')
Out[19]:
In [ ]: