# Comparison of chronograms obtained based on different molecules, different models or rate evolution, different tree models

First, let's open the chronograms.

``````

In [18]:

import sys
from ete3 import Tree, TreeStyle, NodeStyle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy
import re

try:
f=open(file, 'r')
except IOError:
print ("Unknown file: "+file)
sys.exit()
line = ""
treeStrings = list()
for l in f:
if "tree TREE1 = [&R]" in l:
line = l.replace("tree TREE1 = [&R]", "")
tree = re.sub('\[&index=\d+([,\w=\d,%\.\{\}])*\]', "", line)#[&index=102,posterior=1.000000,ccp=1.000000,height_95%_HPD={0.025722,0.071446}]
#print(tree)
return Tree(tree)

#t23SUCLN = Tree("23S/output/UCLNMAP.nhx")

``````

### All trees have been read, let's get their node heights, and compare them.

``````

In [25]:

def getNodeHeights( t ):
node2Height = dict()
id2Height = dict()
for node in t.traverse("postorder"):
if node not in node2Height:
node2Height[node] = 0.0
id2Height[node.name] = 0.0
if node.up:
if node.up.name =='':
leaves = node.up.get_leaves()
name=""
for l in leaves:
name += l.name
node.up.name=name
node2Height[node.up] = node2Height[node] + node.dist
id2Height[str(node.up.name)] = node2Height[node] + node.dist
# print node.name + " : " + str(node2Height[node])
#return node2Height,id2Height
return id2Height

heights23S = list(getNodeHeights(t23SUCLN).values())
heights16S23S = list(getNodeHeights(t16S23SUCLN).values())
heights16S = list(getNodeHeights(t16SUCLN).values())

``````

Some plots:

``````

In [32]:

def plotAndComputeCorrelation(x,y,namex, namey, logyn, logxn, limx=None, limy=None):
print("Pearson correlation coefficient and p-value: "+ str(scipy.stats.pearsonr(x, y)))
#Plotting:
plt.plot(x, y, 'bo')
plt.xlabel(namex, fontsize=15)
plt.ylabel(namey, fontsize=15)
#plt.legend(['data'], loc='upper left')
if logyn:
plt.yscale('log')
if logxn:
plt.xscale('log')
if not limx == None:
plt.xlim(limx)
if not limy == None:
plt.ylim(limy)

``````
``````

In [33]:

%matplotlib inline

plotAndComputeCorrelation(heights23S,heights16S23S,'Node ages 23S', 'Node ages 16S23S', False, False)

``````
``````

Pearson correlation coefficient and p-value: (0.95641673011316142, 2.0533545940852737e-109)

``````
``````

In [34]:

%matplotlib inline

plotAndComputeCorrelation(heights16S,heights16S23S,'Node ages 16S', 'Node ages 16S23S', False, False)

``````
``````

Pearson correlation coefficient and p-value: (0.99404879697582493, 1.6733761455743422e-195)

``````
``````

In [37]:

%matplotlib inline

plotAndComputeCorrelation(heights16S,heights23S,'Node ages 16S', 'Node ages 23S', False, False)

``````
``````

Pearson correlation coefficient and p-value: (0.93987606769313892, 9.8173929785950193e-96)

``````

Let's zoom in a little bit.

``````

In [36]:

limx=list()
limx.append(0.0)
limx.append(0.4)
limy=list()
limy.append(0.0)
limy.append(0.4)

%matplotlib inline

plotAndComputeCorrelation(heights23S,heights16S23S,'Node ages 23S', 'Node ages 16S23S', False, False, limx, limy)

``````
``````

Pearson correlation coefficient and p-value: (0.95641673011316142, 2.0533545940852737e-109)

``````
``````

In [38]:

%matplotlib inline

plotAndComputeCorrelation(heights16S,heights16S23S,'Node ages 16S', 'Node ages 16S23S', False, False, limx, limy)

``````
``````

Pearson correlation coefficient and p-value: (0.99404879697582493, 1.6733761455743422e-195)

``````
``````

In [39]:

%matplotlib inline

plotAndComputeCorrelation(heights16S,heights23S,'Node ages 16S', 'Node ages 23S', False, False, limx, limy)

``````
``````

Pearson correlation coefficient and p-value: (0.93987606769313892, 9.8173929785950193e-96)

``````
``````

In [ ]:

``````