In [1]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import matplotlib
import seaborn as sns
import scipy
from IPython.display import HTML
In [2]:
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
Out[2]:
The simulation was performed on 10 different trees, used to generate alignments. For each tree, we gathered up to 5 calibration points (0, 1, 3, 5), and up to 5 constraints (0, 1, 3, 5). We ran the MCMC in each combination of conditions (=16 combinations), in 5 replicates, changing the nature of the calibrations and constraints. So in total we have $10~trees*16~conditions*5~replicates=800~points$.
In [52]:
colNames = ["treeId","numCalib","numCons","numReplicate","correlation", "rmsd", "correlation_bl", "rmsd_bl", "numNodes","numInHPD","fracInHPD","percent0","percent25","percent50","percent75","percent100"]
d = pd.read_csv ("resultAllTrees.txt", sep="\t", header=None, names=colNames)
In [53]:
d.describe()
Out[53]:
In [54]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Correlation in branch lengths')
Out[54]:
In [55]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='RMSD in branch lengths')
Out[55]:
In [56]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='RMSD in node ages')
Out[56]:
In [57]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Correlation with true dates')
Out[57]:
In [58]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["fracInHPD"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["fracInHPD"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Fraction of dates in 95% HPD')
Out[58]:
In [59]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCons"], y=d["percent50"], inner=None)
ax = sns.swarmplot(d["numCons"], y=d["percent50"], color="white", edgecolor="gray")
ax.set(xlabel='Number of constraints', ylabel='Median size of the 95% HPD')
Out[59]:
In [60]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='RMSD in branch lengths')
Out[60]:
In [61]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Correlation in branch lengths')
Out[61]:
In [62]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='RMSD in node ages')
Out[62]:
In [63]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Correlation with true dates')
Out[63]:
In [64]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["fracInHPD"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["fracInHPD"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Fraction in 95% HPD')
Out[64]:
In [65]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["numCalib"], y=d["percent50"], inner=None)
ax = sns.swarmplot(d["numCalib"], y=d["percent50"], color="white", edgecolor="gray")
ax.set(xlabel='Number of calibrations', ylabel='Median size of the 95% HPD')
Out[65]:
In [66]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd_bl", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in branch lengths')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[66]:
In [67]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation_bl", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in branch lengths')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[67]:
In [68]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="rmsd", x="numCalib", data=d)
#ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average rmsd in node ages')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[68]:
In [69]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="correlation", x="numCalib", data=d)
ax.set_ylim(0.8, 1.0)
ax.set(xlabel='Number of calibrations', ylabel='Average correlation in node ages')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[69]:
In [70]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="fracInHPD", x="numCalib", data=d)
ax.set_ylim(50, 90)
ax.set(xlabel='Number of calibrations', ylabel='Average fraction in 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[70]:
In [71]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent0", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average smallest 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[71]:
In [72]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent25", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average 1st quartile 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[72]:
In [73]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent50", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average median 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[73]:
In [74]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent75", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average 3rd quartile 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[74]:
In [75]:
%matplotlib inline
ax = sns.barplot(hue="numCons", y="percent100", x="numCalib", data=d)
ax.set(xlabel='Number of calibrations', ylabel='Average maximum 95% HPD')
# Put the legend out of the figure
handles, labels = ax.get_legend_handles_labels()
ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., handles=handles, labels=["0 constraint", "1 constraint", "3 constraints", "5 constraints", ])
Out[75]:
Constraints and calibrations both improve accuracy. The number of constraints does not seem to have much of an effect on the size of the 95% HPD, contrary to calibrations. However, when using all constraints, resulyts are much improved, although with a lot of variance. It is possible that on some trees convergence was difficult in the fully constrained runs.
First let's order the trees from good to bad according to the correlation in node ages when all constraints are used:
In [106]:
d_sel = d.loc[d['numCons'] == 100]
print(d_sel.sort_values(['correlation'], ascending=False))
In [76]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["rmsd"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["rmsd"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Rmsd in node ages')
Out[76]:
In [28]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["correlation"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["correlation"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Correlation in node ages')
Out[28]:
In [29]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["rmsd_bl"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["rmsd_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Rmsd in branch lengths')
Out[29]:
In [30]:
%matplotlib inline
sns.set_style("whitegrid")
ax = sns.violinplot(d["treeId"], y=d["correlation_bl"], inner=None)
ax = sns.swarmplot(d["treeId"], y=d["correlation_bl"], color="white", edgecolor="gray")
ax.set(xlabel='Tree ID', ylabel='Correlation in branch lengths')
Out[30]: