Notebook to get a quick estimate of the number of nodes that differ between two trees.


In [1]:
import dendropy
from dendropy.utility.fileutils import find_files
import numpy as np

Read in data.


In [2]:
taxa = dendropy.TaxonSet()

ours = dendropy.Tree.get_from_path('../best.phy', 'newick', taxon_set=taxa)

Calculate Robinson-Foulds distance, and tree length.


In [4]:
non_ml = dendropy.Tree.get_from_path('../Trees/MLE/ExaML_result.SquamataPyron.MLE.2b', 'newick', taxon_set = taxa)
print ours.length()
print non_ml.length()
print ours.symmetric_difference(non_ml)


124641.641853
204.36802613
1002

In [ ]:
taxa = dendropy.TaxonSet()

pb = dendropy.Tree.get_from_path('../Trees/TotalOptimization/Ranked/2598364', 'nexus', taxon_set=taxa)

In [7]:
rfs = [tree.symmetric_difference(pb_o) for tree in uotrees]

In [8]:
rfs


Out[8]:
[2444, 2434, 2440, 2430, 2444, 2406, 2415, 2424, 2444, 2434, 2428]

In [3]:
olist = find_files(top='garli_opt/', filename_filter='*.tre')
print olist


['/home/april/projectfiles/squamates/garli_opt/1.tre', '/home/april/projectfiles/squamates/garli_opt/4.tre', '/home/april/projectfiles/squamates/garli_opt/5.tre', '/home/april/projectfiles/squamates/garli_opt/6.tre', '/home/april/projectfiles/squamates/garli_opt/MLE.tre']

In [ ]:
otrees = [dendropy.Tree.get_from_path(filename, "nexus") for filename in olist]

Calculate RF score, pairwise over all trees in sample.


In [ ]:
n = len(uotrees)
udiffarray = np.zeros((n,n))

for i, ele1 in enumerate(uotrees):
    for j, ele2 in enumerate(uotrees):
        if j >= i:
            break # Since the matrix is symmetrical we don't need to
                  # calculate everything
        difference = ele1.symmetric_difference(ele2) 
        udiffarray[i, j] = difference
        udiffarray[j, i] = difference

In [ ]:
diffarray

In [38]:
diffarray


Out[38]:
array([[    0.,   866.,   974.,   886.,  1002.,   884.,   984.,   938.,
          946.],
       [  866.,     0.,   716.,   686.,   632.,   604.,   658.,   700.,
          654.],
       [  974.,   716.,     0.,   718.,   682.,   696.,   794.,   756.,
          784.],
       [  886.,   686.,   718.,     0.,   800.,   692.,   696.,   758.,
          762.],
       [ 1002.,   632.,   682.,   800.,     0.,   618.,   808.,   810.,
          674.],
       [  884.,   604.,   696.,   692.,   618.,     0.,   754.,   696.,
          686.],
       [  984.,   658.,   794.,   696.,   808.,   754.,     0.,   720.,
          714.],
       [  938.,   700.,   756.,   758.,   810.,   696.,   720.,     0.,
          772.],
       [  946.,   654.,   784.,   762.,   674.,   686.,   714.,   772.,
            0.]])

In [36]:
o_tl = [tree.length() for tree in otrees]
print o_tl


[267.81482687115044, 204.3680261301375, 218.91695960755754, 205.61858615952585, 215.44145413795812, 203.79452107889662, 203.37676965100104, 205.6211342413188, 203.61742308190165]

In [39]:
uo_tl = [mle.length() for mle in uotrees]
print uo_tl


[267.81482687115044, 204.3680261301375, 218.91695960755754, 205.61858615952585, 215.44145413795812, 203.79452107889662, 203.37676965100104, 205.6211342413188, 203.61742308190165]

In [ ]: