In [1]:
import dendropy

In [2]:
ebola_raxml = dendropy.Tree.get_from_path('my_ebola.nex', 'nexus')

In [3]:
def compute_level(node, level=0):
    for child in node.child_nodes():
        compute_level(child, level + 1)
    if node.taxon is not None:
        print("%s: %d %d" % (node.taxon, node.level(), level))

compute_level(ebola_raxml.seed_node)


BDBV_KC545395 18891 bp: 1 1
SUDV_EU338380 18891 bp: 9 9
SUDV_KC242783 18891 bp: 10 10
SUDV_FJ968794 18891 bp: 10 10
SUDV_KC589025 18891 bp: 9 9
SUDV_AY729654 18891 bp: 10 10
SUDV_JN638998 18891 bp: 10 10
RESTV_FJ621584 18891 bp: 8 8
RESTV_JX477165 18891 bp: 10 10
RESTV_FJ621583 18891 bp: 10 10
RESTV_JX477166 18891 bp: 11 11
RESTV_AB050936 18891 bp: 11 11
RESTV_FJ621585 18891 bp: 10 10
EBOV_2014_KM034561 18891 bp: 10 10
EBOV_2014_KM034562 18891 bp: 10 10
EBOV_2014_KM034557 18891 bp: 12 12
EBOV_2014_KM034558 18891 bp: 14 14
EBOV_2014_KM034556 18891 bp: 14 14
EBOV_2014_KM034560 18891 bp: 13 13
EBOV_2014_KM233113 18891 bp: 13 13
EBOV_2014_KM233114 18891 bp: 13 13
EBOV_2014_KM233116 18891 bp: 13 13
EBOV_2014_KM233115 18891 bp: 14 14
EBOV_2014_KM233117 18891 bp: 15 15
EBOV_2014_KM233118 18891 bp: 15 15
EBOV_2014_KM034559 18891 bp: 10 10
EBOV_2014_KM034563 18891 bp: 8 8
EBOV_1976_KC242801 18891 bp: 10 10
EBOV_1976_AF272001 18891 bp: 10 10
EBOV_1995_KC242796 18891 bp: 10 10
EBOV_1995_KC242799 18891 bp: 10 10
EBOV_2007_KC242788 18891 bp: 10 10
EBOV_2007_KC242787 18891 bp: 10 10
EBOV_2007_KC242784 18891 bp: 10 10
EBOV_2007_KC242785 18891 bp: 12 12
EBOV_2007_KC242790 18891 bp: 12 12
EBOV_2007_KC242786 18891 bp: 12 12
EBOV_2007_KC242789 18891 bp: 12 12
TAFV_FJ217162 18891 bp: 5 5
BDBV_FJ217161 18891 bp: 4 4
BDBV_KC545396 18891 bp: 3 3
BDBV_KC545394 18891 bp: 2 2
BDBV_KC545393 18891 bp: 1 1

In [4]:
def compute_height(node):
    children = node.child_nodes()
    if len(children) == 0:
        height = 0
    else:
        height = 1 + max(map(lambda x: compute_height(x), children))
    desc = node.taxon or 'Internal'
    print("%s: %d %d" % (desc, height, node.level()))
    return height

compute_height(ebola_raxml.seed_node)


BDBV_KC545395 18891 bp: 0 1
SUDV_EU338380 18891 bp: 0 9
SUDV_KC242783 18891 bp: 0 10
SUDV_FJ968794 18891 bp: 0 10
Internal: 1 9
Internal: 2 8
SUDV_KC589025 18891 bp: 0 9
SUDV_AY729654 18891 bp: 0 10
SUDV_JN638998 18891 bp: 0 10
Internal: 1 9
Internal: 2 8
Internal: 3 7
RESTV_FJ621584 18891 bp: 0 8
RESTV_JX477165 18891 bp: 0 10
RESTV_FJ621583 18891 bp: 0 10
Internal: 1 9
RESTV_JX477166 18891 bp: 0 11
RESTV_AB050936 18891 bp: 0 11
Internal: 1 10
RESTV_FJ621585 18891 bp: 0 10
Internal: 2 9
Internal: 3 8
Internal: 4 7
Internal: 5 6
EBOV_2014_KM034561 18891 bp: 0 10
EBOV_2014_KM034562 18891 bp: 0 10
Internal: 1 9
EBOV_2014_KM034557 18891 bp: 0 12
EBOV_2014_KM034558 18891 bp: 0 14
EBOV_2014_KM034556 18891 bp: 0 14
Internal: 1 13
EBOV_2014_KM034560 18891 bp: 0 13
Internal: 2 12
Internal: 3 11
EBOV_2014_KM233113 18891 bp: 0 13
EBOV_2014_KM233114 18891 bp: 0 13
Internal: 1 12
EBOV_2014_KM233116 18891 bp: 0 13
EBOV_2014_KM233115 18891 bp: 0 14
EBOV_2014_KM233117 18891 bp: 0 15
EBOV_2014_KM233118 18891 bp: 0 15
Internal: 1 14
Internal: 2 13
Internal: 3 12
Internal: 4 11
Internal: 5 10
EBOV_2014_KM034559 18891 bp: 0 10
Internal: 6 9
Internal: 7 8
EBOV_2014_KM034563 18891 bp: 0 8
Internal: 8 7
EBOV_1976_KC242801 18891 bp: 0 10
EBOV_1976_AF272001 18891 bp: 0 10
Internal: 1 9
EBOV_1995_KC242796 18891 bp: 0 10
EBOV_1995_KC242799 18891 bp: 0 10
Internal: 1 9
Internal: 2 8
EBOV_2007_KC242788 18891 bp: 0 10
EBOV_2007_KC242787 18891 bp: 0 10
Internal: 1 9
EBOV_2007_KC242784 18891 bp: 0 10
EBOV_2007_KC242785 18891 bp: 0 12
EBOV_2007_KC242790 18891 bp: 0 12
Internal: 1 11
EBOV_2007_KC242786 18891 bp: 0 12
EBOV_2007_KC242789 18891 bp: 0 12
Internal: 1 11
Internal: 2 10
Internal: 3 9
Internal: 4 8
Internal: 5 7
Internal: 9 6
Internal: 10 5
TAFV_FJ217162 18891 bp: 0 5
Internal: 11 4
BDBV_FJ217161 18891 bp: 0 4
Internal: 12 3
BDBV_KC545396 18891 bp: 0 3
Internal: 13 2
BDBV_KC545394 18891 bp: 0 2
Internal: 14 1
BDBV_KC545393 18891 bp: 0 1
Internal: 15 0
Out[4]:
15

In [5]:
def compute_nofs(node):
    children = node.child_nodes()
    nofs = len(children)
    map(lambda x: compute_nofs(x), children)
    desc = node.taxon or 'Internal'
    print("%s: %d %d" % (desc, nofs, node.level()))

compute_nofs(ebola_raxml.seed_node)


BDBV_KC545395 18891 bp: 0 1
SUDV_EU338380 18891 bp: 0 9
SUDV_KC242783 18891 bp: 0 10
SUDV_FJ968794 18891 bp: 0 10
Internal: 2 9
Internal: 2 8
SUDV_KC589025 18891 bp: 0 9
SUDV_AY729654 18891 bp: 0 10
SUDV_JN638998 18891 bp: 0 10
Internal: 2 9
Internal: 2 8
Internal: 2 7
RESTV_FJ621584 18891 bp: 0 8
RESTV_JX477165 18891 bp: 0 10
RESTV_FJ621583 18891 bp: 0 10
Internal: 2 9
RESTV_JX477166 18891 bp: 0 11
RESTV_AB050936 18891 bp: 0 11
Internal: 2 10
RESTV_FJ621585 18891 bp: 0 10
Internal: 2 9
Internal: 2 8
Internal: 2 7
Internal: 2 6
EBOV_2014_KM034561 18891 bp: 0 10
EBOV_2014_KM034562 18891 bp: 0 10
Internal: 2 9
EBOV_2014_KM034557 18891 bp: 0 12
EBOV_2014_KM034558 18891 bp: 0 14
EBOV_2014_KM034556 18891 bp: 0 14
Internal: 2 13
EBOV_2014_KM034560 18891 bp: 0 13
Internal: 2 12
Internal: 2 11
EBOV_2014_KM233113 18891 bp: 0 13
EBOV_2014_KM233114 18891 bp: 0 13
Internal: 2 12
EBOV_2014_KM233116 18891 bp: 0 13
EBOV_2014_KM233115 18891 bp: 0 14
EBOV_2014_KM233117 18891 bp: 0 15
EBOV_2014_KM233118 18891 bp: 0 15
Internal: 2 14
Internal: 2 13
Internal: 2 12
Internal: 2 11
Internal: 2 10
EBOV_2014_KM034559 18891 bp: 0 10
Internal: 2 9
Internal: 2 8
EBOV_2014_KM034563 18891 bp: 0 8
Internal: 2 7
EBOV_1976_KC242801 18891 bp: 0 10
EBOV_1976_AF272001 18891 bp: 0 10
Internal: 2 9
EBOV_1995_KC242796 18891 bp: 0 10
EBOV_1995_KC242799 18891 bp: 0 10
Internal: 2 9
Internal: 2 8
EBOV_2007_KC242788 18891 bp: 0 10
EBOV_2007_KC242787 18891 bp: 0 10
Internal: 2 9
EBOV_2007_KC242784 18891 bp: 0 10
EBOV_2007_KC242785 18891 bp: 0 12
EBOV_2007_KC242790 18891 bp: 0 12
Internal: 2 11
EBOV_2007_KC242786 18891 bp: 0 12
EBOV_2007_KC242789 18891 bp: 0 12
Internal: 2 11
Internal: 2 10
Internal: 2 9
Internal: 2 8
Internal: 2 7
Internal: 2 6
Internal: 2 5
TAFV_FJ217162 18891 bp: 0 5
Internal: 2 4
BDBV_FJ217161 18891 bp: 0 4
Internal: 2 3
BDBV_KC545396 18891 bp: 0 3
Internal: 2 2
BDBV_KC545394 18891 bp: 0 2
Internal: 2 1
BDBV_KC545393 18891 bp: 0 1
Internal: 3 0

In [6]:
def print_nodes(node):
    for child in node.child_nodes():
        print_nodes(child)
    if node.taxon is not None:
        print('%s (%d)' % (node.taxon, node.level()))

print_nodes(ebola_raxml.seed_node)


BDBV_KC545395 18891 bp (1)
SUDV_EU338380 18891 bp (9)
SUDV_KC242783 18891 bp (10)
SUDV_FJ968794 18891 bp (10)
SUDV_KC589025 18891 bp (9)
SUDV_AY729654 18891 bp (10)
SUDV_JN638998 18891 bp (10)
RESTV_FJ621584 18891 bp (8)
RESTV_JX477165 18891 bp (10)
RESTV_FJ621583 18891 bp (10)
RESTV_JX477166 18891 bp (11)
RESTV_AB050936 18891 bp (11)
RESTV_FJ621585 18891 bp (10)
EBOV_2014_KM034561 18891 bp (10)
EBOV_2014_KM034562 18891 bp (10)
EBOV_2014_KM034557 18891 bp (12)
EBOV_2014_KM034558 18891 bp (14)
EBOV_2014_KM034556 18891 bp (14)
EBOV_2014_KM034560 18891 bp (13)
EBOV_2014_KM233113 18891 bp (13)
EBOV_2014_KM233114 18891 bp (13)
EBOV_2014_KM233116 18891 bp (13)
EBOV_2014_KM233115 18891 bp (14)
EBOV_2014_KM233117 18891 bp (15)
EBOV_2014_KM233118 18891 bp (15)
EBOV_2014_KM034559 18891 bp (10)
EBOV_2014_KM034563 18891 bp (8)
EBOV_1976_KC242801 18891 bp (10)
EBOV_1976_AF272001 18891 bp (10)
EBOV_1995_KC242796 18891 bp (10)
EBOV_1995_KC242799 18891 bp (10)
EBOV_2007_KC242788 18891 bp (10)
EBOV_2007_KC242787 18891 bp (10)
EBOV_2007_KC242784 18891 bp (10)
EBOV_2007_KC242785 18891 bp (12)
EBOV_2007_KC242790 18891 bp (12)
EBOV_2007_KC242786 18891 bp (12)
EBOV_2007_KC242789 18891 bp (12)
TAFV_FJ217162 18891 bp (5)
BDBV_FJ217161 18891 bp (4)
BDBV_KC545396 18891 bp (3)
BDBV_KC545394 18891 bp (2)
BDBV_KC545393 18891 bp (1)

In [7]:
from collections import deque

def print_breadth(tree):
    queue = deque()
    queue.append(tree.seed_node)
    while len(queue) > 0:
        process_node = queue.popleft()
        if process_node.taxon is not None:
            print('%s (%d)' % (process_node.taxon, process_node.level()))
        else:
            for child in process_node.child_nodes():
                queue.append(child)

print_breadth(ebola_raxml)


BDBV_KC545395 18891 bp (1)
BDBV_KC545393 18891 bp (1)
BDBV_KC545394 18891 bp (2)
BDBV_KC545396 18891 bp (3)
BDBV_FJ217161 18891 bp (4)
TAFV_FJ217162 18891 bp (5)
RESTV_FJ621584 18891 bp (8)
EBOV_2014_KM034563 18891 bp (8)
SUDV_EU338380 18891 bp (9)
SUDV_KC589025 18891 bp (9)
SUDV_KC242783 18891 bp (10)
SUDV_FJ968794 18891 bp (10)
SUDV_AY729654 18891 bp (10)
SUDV_JN638998 18891 bp (10)
RESTV_JX477165 18891 bp (10)
RESTV_FJ621583 18891 bp (10)
RESTV_FJ621585 18891 bp (10)
EBOV_2014_KM034561 18891 bp (10)
EBOV_2014_KM034562 18891 bp (10)
EBOV_2014_KM034559 18891 bp (10)
EBOV_1976_KC242801 18891 bp (10)
EBOV_1976_AF272001 18891 bp (10)
EBOV_1995_KC242796 18891 bp (10)
EBOV_1995_KC242799 18891 bp (10)
EBOV_2007_KC242788 18891 bp (10)
EBOV_2007_KC242787 18891 bp (10)
EBOV_2007_KC242784 18891 bp (10)
RESTV_JX477166 18891 bp (11)
RESTV_AB050936 18891 bp (11)
EBOV_2014_KM034557 18891 bp (12)
EBOV_2007_KC242785 18891 bp (12)
EBOV_2007_KC242790 18891 bp (12)
EBOV_2007_KC242786 18891 bp (12)
EBOV_2007_KC242789 18891 bp (12)
EBOV_2014_KM034560 18891 bp (13)
EBOV_2014_KM233113 18891 bp (13)
EBOV_2014_KM233114 18891 bp (13)
EBOV_2014_KM233116 18891 bp (13)
EBOV_2014_KM034558 18891 bp (14)
EBOV_2014_KM034556 18891 bp (14)
EBOV_2014_KM233115 18891 bp (14)
EBOV_2014_KM233117 18891 bp (15)
EBOV_2014_KM233118 18891 bp (15)

In [8]:
#remove
#def clean_comments(node, depth=0):
#    for child in node.child_nodes():
#        clean_comments(child, depth + 1)
#    if node.taxon is None:
#        node.comments = None

#clean_comments(ebola_raxml.seed_node)
#ebola_raxml.write_to_path('ebola_clean.nex', 'nexus')
#ebov_2014_raxml = dendropy.Tree.get_from_path('my_ebov_2014.nex', 'nexus')
#clean_comments(ebov_2014_raxml.seed_node)
#ebov_2014_raxml.write_to_path('ebov_2014_clean.nex', 'nexus')

In [10]:
from copy import deepcopy
simple_ebola = deepcopy(ebola_raxml)

def simplify_tree(node):
    prefs = set()
    for leaf in node.leaf_nodes():
        my_toks = leaf.taxon.label.split(' ')[0].split('_')
        if my_toks[0] == 'EBOV':
            prefs.add('EBOV' + my_toks[1])
        else:
            prefs.add(my_toks[0])
    if len(prefs) == 1:
        print(prefs, len(node.leaf_nodes()))
        node.taxon = dendropy.Taxon(label=list(prefs)[0])
        #node.collapse_clade()
        node.set_child_nodes([])
    else:
        for child in node.child_nodes():
            simplify_tree(child)

simplify_tree(simple_ebola.seed_node)
simple_ebola.ladderize()
simple_ebola.write_to_path('ebola_simple.nex', 'nexus')


(set(['BDBV']), 1)
(set(['SUDV']), 6)
(set(['RESTV']), 6)
(set(['EBOV2014']), 14)
(set(['EBOV1976']), 2)
(set(['EBOV1995']), 2)
(set(['EBOV2007']), 7)
(set(['TAFV']), 1)
(set(['BDBV']), 1)
(set(['BDBV']), 1)
(set(['BDBV']), 1)
(set(['BDBV']), 1)

In [ ]: