In [31]:
import os
if os.getcwd().endswith('Benchmarks'):
os.chdir('..')
In [32]:
import NotebookImport
from Setup.Imports import *
In [33]:
from Setup.MethylationAgeModels import run_horvath_model, run_hannum_model
In [34]:
from Setup.Read_HIV_Data import *
In [35]:
df_meth = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'quant_BMIQ_adj')
In [36]:
store = pd.HDFStore(HDFS_DIR + 'methylation_norm.h5')
In [37]:
labels = store['labels']
labels = labels.set_index('sampleNames')['studyIndex']
l2 = labels.map({'s1':'Hannum','s3':'EPIC','s2':'Primary Cohort'})
In [38]:
df_meth_n = pd.read_hdf(HDFS_DIR + 'methylation_norm.h5', 'BMIQ_Horvath')
n2 = flow_sorted_data.groupby(cell_type, axis=1).mean()
avg = n2[cell_counts.columns].dot(cell_counts.ix[df_meth_n.columns].T)
d2 = df_meth_n.ix[avg.index, df_meth_n.columns].dropna(axis=[0,1], how='all')
cc = avg.mean(1)
df_meth_n = (d2 - avg).add(cc, axis=0).dropna(how='all')
In [39]:
#Do not import
fig, axs = subplots(1,2, figsize=(10,4), sharey=True)
violin_plot_pandas(l2, age, ax=axs[0])
violin_plot_pandas(l2.ix[study.index], age, ax=axs[1])
for ax in axs:
prettify_ax(ax)
ax.set_xlabel('')
fig.tight_layout()
In [40]:
pred = run_hannum_model(df_meth)
pred_horvath = run_horvath_model(df_meth_n)
pred_c = (pred_horvath + pred) / 2
pred_c.name = 'Predicted Age (Combined)'
All Patients
In [41]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
pts = labels.ix[labels != 's2'].index
plot_regression(age.ix[pts], pred, ax=axs[0])
plot_regression(age.ix[pts], pred_horvath.ix[pred.index], ax=axs[1])
plot_regression(age.ix[pts], pred_c.ix[pred.index], ax=axs[2])
In [42]:
pts = labels.index.difference(ti(study.isin(['HIV Short','HIV Long'])))
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [43]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age, m1_adj, ax=axs[0], density=True)
plot_regression(age, m2_adj, ax=axs[1], density=True)
plot_regression(age, mc_adj, ax=axs[2], density=True)
Only Hannum Data
In [44]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age.ix[ti(labels=='s1')], m1_adj, ax=axs[0])
plot_regression(age.ix[ti(labels=='s1')], m2_adj, ax=axs[1])
plot_regression(age.ix[ti(labels=='s1')], mc_adj, ax=axs[2])
EPIC Data
In [45]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age.ix[ti(labels=='s3')], m1_adj, ax=axs[0])
plot_regression(age.ix[ti(labels=='s3')], m2_adj, ax=axs[1])
plot_regression(age.ix[ti(labels=='s3')], mc_adj, ax=axs[2])
Control samples from our current study
In [46]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age.ix[ti(labels=='s2')], m1_adj, ax=axs[0])
plot_regression(age.ix[ti(labels=='s2')], m2_adj, ax=axs[1])
plot_regression(age.ix[ti(labels=='s2')], mc_adj, ax=axs[2])
In [47]:
dd = pd.DataFrame({(s, m.name): model_fit(m.ix[ti(l2 == s)], age) for s in l2.unique()
for m in [m1_adj, m2_adj, mc_adj]})
dd = dd.unstack().unstack(1).T
dd
Out[47]:
Long term HIV infected patients
In [48]:
in_set = pd.Series(pred.index.isin(study.index), pred.index)
exclude_pts = ti(study.isin(['HIV Short','HIV Long']))
in_set = in_set.ix[in_set.index.difference(exclude_pts)]
In [49]:
in_set.value_counts()
Out[49]:
Looking at the diffence in age advancement from the patients in the set to those withheld.
In [50]:
pts = in_set.index
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [51]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4), sharey=True)
violin_plot_pandas(in_set, (m1_adj - age) / age, ax=axs[0])
violin_plot_pandas(in_set, (m2_adj - age) / age, ax=axs[1])
violin_plot_pandas(in_set, (mc_adj - age) / age, ax=axs[2])
for ax in axs:
prettify_ax(ax)
ax.set_ylabel('Age Advancement')
In [52]:
pts = ti(in_set)
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [53]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age.ix[ti(labels=='s3')], m1_adj, ax=axs[0])
plot_regression(age.ix[ti(labels=='s3')], m2_adj, ax=axs[1])
plot_regression(age.ix[ti(labels=='s3')], mc_adj, ax=axs[2])
In [54]:
pts = ti(in_set).difference(ti(labels == 's2'))
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [55]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age, m1_adj, ax=axs[0])
plot_regression(age, m2_adj, ax=axs[1])
plot_regression(age, mc_adj, ax=axs[2])
All Patients
In [56]:
pts = ti(in_set == False).difference(ti(labels == 's2'))
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [57]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age, m1_adj, ax=axs[0])
plot_regression(age, m2_adj, ax=axs[1])
plot_regression(age, mc_adj, ax=axs[2])
Females 25-68
In [58]:
pts = ti((age >= 25.) & (age <= 68.) & (gender == 'F'))
m1_adj, m2_adj, mc_adj = two_step_adjustment(pred, pred_horvath, age, labels.ix[pts])
In [59]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4))
plot_regression(age, m1_adj, ax=axs[0], density=True)
plot_regression(age, m2_adj, ax=axs[1], density=True)
plot_regression(age, mc_adj, ax=axs[2], density=True)