First, let's open the chronograms.
In [18]:
import sys
from ete3 import Tree, TreeStyle, NodeStyle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import scipy
import re
def readMAPChronogramFromRBOutput (file):
try:
f=open(file, 'r')
except IOError:
print ("Unknown file: "+file)
sys.exit()
line = ""
treeStrings = list()
for l in f:
if "tree TREE1 = [&R]" in l:
line = l.replace("tree TREE1 = [&R]", "")
tree = re.sub('\[&index=\d+([,\w=\d,%\.\{\}])*\]', "", line)#[&index=102,posterior=1.000000,ccp=1.000000,height_95%_HPD={0.025722,0.071446}]
#print(tree)
return Tree(tree)
#t23SUCLN = Tree("23S/output/UCLNMAP.nhx")
t23SUCLN = readMAPChronogramFromRBOutput("23S/output/UCLNMAP.tre")
t16S23SUCLN = readMAPChronogramFromRBOutput("16S23S/output/UCLNMAP.tre")
t16SUCLN = readMAPChronogramFromRBOutput("16S/output/UCLNMAP.tre")
In [25]:
def getNodeHeights( t ):
node2Height = dict()
id2Height = dict()
for node in t.traverse("postorder"):
if node not in node2Height:
node2Height[node] = 0.0
id2Height[node.name] = 0.0
if node.up:
if node.up.name =='':
leaves = node.up.get_leaves()
name=""
for l in leaves:
name += l.name
node.up.name=name
node2Height[node.up] = node2Height[node] + node.dist
id2Height[str(node.up.name)] = node2Height[node] + node.dist
# print node.name + " : " + str(node2Height[node])
#return node2Height,id2Height
return id2Height
heights23S = list(getNodeHeights(t23SUCLN).values())
heights16S23S = list(getNodeHeights(t16S23SUCLN).values())
heights16S = list(getNodeHeights(t16SUCLN).values())
Some plots:
In [32]:
def plotAndComputeCorrelation(x,y,namex, namey, logyn, logxn, limx=None, limy=None):
print("Pearson correlation coefficient and p-value: "+ str(scipy.stats.pearsonr(x, y)))
#Plotting:
plt.plot(x, y, 'bo')
plt.xlabel(namex, fontsize=15)
plt.ylabel(namey, fontsize=15)
#plt.legend(['data'], loc='upper left')
if logyn:
plt.yscale('log')
if logxn:
plt.xscale('log')
if not limx == None:
plt.xlim(limx)
if not limy == None:
plt.ylim(limy)
In [33]:
%matplotlib inline
plotAndComputeCorrelation(heights23S,heights16S23S,'Node ages 23S', 'Node ages 16S23S', False, False)
In [34]:
%matplotlib inline
plotAndComputeCorrelation(heights16S,heights16S23S,'Node ages 16S', 'Node ages 16S23S', False, False)
In [37]:
%matplotlib inline
plotAndComputeCorrelation(heights16S,heights23S,'Node ages 16S', 'Node ages 23S', False, False)
Let's zoom in a little bit.
In [36]:
limx=list()
limx.append(0.0)
limx.append(0.4)
limy=list()
limy.append(0.0)
limy.append(0.4)
%matplotlib inline
plotAndComputeCorrelation(heights23S,heights16S23S,'Node ages 23S', 'Node ages 16S23S', False, False, limx, limy)
In [38]:
%matplotlib inline
plotAndComputeCorrelation(heights16S,heights16S23S,'Node ages 16S', 'Node ages 16S23S', False, False, limx, limy)
In [39]:
%matplotlib inline
plotAndComputeCorrelation(heights16S,heights23S,'Node ages 16S', 'Node ages 23S', False, False, limx, limy)
In [ ]: