In [12]:
import NotebookImport
from Age_Models_All_Patients import *
In [13]:
groups = (duration.str.startswith('HIV')).combine_first(labels)
groups = groups.ix[groups != 's2']
In [14]:
k = ti((age < 68) & (age > 25))
k = k.difference(ti((labels == 's2') & (gender == 'F')))
k = k.difference(ti(duration != 'Control')).difference(ti(labels=='s2').difference(duration.index))
In [15]:
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[k])
m1_adj.name = 'Hannum model'
m2_adj.name = 'Horvath model'
In [16]:
from Setup.Imports import *
In [17]:
#Do not import
plot_regression(m1_adj, m2_adj)
In [ ]:
diff = ((m1_adj - m2_adj) / ((m1_adj + m2_adj) * .5)).abs()
In [18]:
#Do not import
fig, axs = subplots(1,2, figsize=(9,4))
diff.hist(ax=axs[0])
violin_plot_pandas(labels, diff, ax=axs[1])
In [19]:
from Setup.MethylationAgeModels import hannum_model, horvath_model
It seems that the majority of patients for which the models disagree significantly are enriched for having model probes with poor detection quality. This kind of makes sense.
In [20]:
detection_p = pd.read_hdf(HDFS_DIR + 'dx_methylation.h5', 'detection_p')
#detection_p = detection_p[detection_p[0] > 10e-5]
detection_p = detection_p[detection_p.Sample_Name.isin(diff.index)]
in_model = detection_p[detection_p.level_0.isin(hannum_model.index.union(horvath_model.index))]
In [21]:
v = in_model.groupby('Sample_Name').size().ix[diff.index].fillna(0)
fisher_exact_test(diff > .2, v > 0)
Out[21]:
In [22]:
o = ti(diff > .2)
len(o)
Out[22]:
In [23]:
pts = ti(diff < .2)
m1_adj_c, m2_adj_c, mc_adj_c = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
mc_adj_c.name = 'Consensus Model'
In [24]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age.ix[ti(labels == 's1')], mc_adj_c, ax=axs[0])
plot_regression(age.ix[ti(labels == 's3')], mc_adj_c, ax=axs[1])
plot_regression(age.ix[ti(labels == 's2')], mc_adj_c, ax=axs[2])
In [25]:
dd = pd.DataFrame({(s, m.name): model_fit(m.ix[ti(l2 == s)], age)
for s in l2.unique()
for m in [m1_adj, m2_adj, mc_adj, mc_adj_c]})
dd = dd.unstack().unstack(1).T
dd
Out[25]:
In [26]:
#Do not import
fig, axs = subplots(1,3, figsize=(12,3.5))
ax = axs[0]
plot_regression(m1_adj, m2_adj.ix[ti(diff < .2)], ax=ax, s=30,
color='grey', edgecolor='black', alpha=.4)
series_scatter(m1_adj, m2_adj.ix[ti(diff > .2)], color=colors[0],
ax=ax, alpha=1, edgecolor='grey', ann=None,
s=30)
plot_regression(age.ix[ti(labels == 's1')], mc_adj_c, ax=axs[1],
s=30, color=colors_st[1], alpha=1, edgecolor='grey')
plot_regression(age.ix[ti(labels == 's3')], mc_adj_c, ax=axs[2],
s=30, color=colors_st[2], alpha=1, edgecolor='grey')
fig.tight_layout()
In [27]:
#Do not import
fig, axs = subplots(1,3, figsize=(12,3.5))
ax = axs[0]
plot_regression(m1_adj.clip(10, 90), m2_adj.clip(10, 90).ix[ti(diff < .2)], ax=ax, s=30,
color='grey', edgecolor='black', alpha=.4)
series_scatter(m1_adj.clip(10, 90), m2_adj.clip(10, 90).ix[ti(diff >= .2)], color=colors[0],
ax=ax, alpha=1, edgecolor='grey', ann=None,
s=30)
plot_regression(age.ix[ti(labels == 's1')], mc_adj_c, ax=axs[1],
s=30, color='grey', edgecolor='black')
plot_regression(age.ix[ti(labels == 's3')], mc_adj_c, ax=axs[2],
s=30, color='grey', edgecolor='black')
for ax in axs:
ax.set_xlim(10,95)
ax.set_ylim(10,95)
fig.tight_layout()
fig.savefig(FIGDIR + 'f2_top.pdf')