In [1]:
import NotebookImport
from Setup.Imports import *
In [2]:
from Setup.MethylationAgeModels import *
In [3]:
from Setup.Read_HIV_Data import *
In [4]:
hiv = (duration=='Control').map({False: 'HIV+', True: 'HIV-'})
hiv.name = 'HIV Status'
In [5]:
hiv.value_counts()
Out[5]:
In [6]:
def model_plot(prediction):
fig, axs = subplots(1,2, figsize=(10,4), sharex=True, sharey=True)
plot_regression(age, prediction.ix[ti(hiv=='HIV-')], ax=axs[0])
plot_regression(age, prediction.ix[ti(hiv=='HIV+')], ax=axs[1])
axs[0].set_title('HIV-')
axs[1].set_title('HIV+')
In [7]:
pred = run_hannum_model(df_hiv)
In [8]:
#Do not import
model_plot(pred)
In [9]:
get_error(age, pred, denominator='x', groups=hiv)
Out[9]:
Preforming a linear adjustment on the control data.
In [10]:
reg = linear_regression(age, pred.ix[ti(duration=='Control')])
pred_adj = (pred - reg['intercept']) / reg['slope']
In [11]:
#Do not import
model_plot(pred_adj)
In [12]:
get_error(age, pred_adj, denominator='x', groups=hiv)
Out[12]:
In [13]:
pred_horvath = run_horvath_model(df_hiv_n)
In [14]:
#Do not import
model_plot(pred_horvath)
In [15]:
reg = linear_regression(age, pred_horvath.ix[ti(duration=='Control')])
pred_horvath_adj = (pred_horvath - reg['intercept']) / reg['slope']
In [16]:
#Do not import
model_plot(pred_horvath_adj)
In [17]:
get_error(age, pred_horvath_adj, denominator='x', groups=hiv)
Out[17]:
In [18]:
#Do not import
fig, axs = subplots(1,2, figsize=(9,4), sharex=True, sharey=True)
plot_regression(pred_adj.ix[ti(hiv=='HIV-')], pred_horvath_adj, ax=axs[0])
plot_regression(pred_adj.ix[ti(hiv=='HIV+')], pred_horvath_adj, ax=axs[1])
fig.tight_layout()
In [19]:
get_error(pred_adj, pred_horvath_adj, groups=hiv)
Out[19]:
In [20]:
diff = ((pred_adj - pred_horvath_adj) / ((pred_adj + pred_horvath_adj) * .5)).abs()
diff = diff.groupby(level=0).first()
diff.name = 'Absolute difference in models'
In [21]:
#Do not import
fig, axs = subplots(1,2, figsize=(10,4))
diff.hist(ax=axs[0])
axs[0].set_xlabel(diff.name)
violin_plot_pandas(duration, diff, ax=axs[1])
fig.tight_layout()
In [22]:
in_model = detection_p[detection_p.level_0.isin(hannum_model.index.union(horvath_model.index))]
(diff).ix[in_model.Sample_Name.unique()].dropna().order()
Out[22]:
In [23]:
pt = ti(diff < .2)
In [24]:
o = ti(diff > .2)
o
Out[24]:
In [25]:
#Do not import
fig, axs = subplots(1,2, figsize=(9,4), sharex=True, sharey=True)
plot_regression(pred_adj.ix[ti(hiv=='HIV-')].ix[pt], pred_horvath_adj, ax=axs[0])
plot_regression(pred_adj.ix[ti(hiv=='HIV+')].ix[pt], pred_horvath_adj, ax=axs[1])
series_scatter(pred_adj.ix[ti(hiv=='HIV-')].ix[o], pred_horvath_adj, ax=axs[1],
color=colors[0], ann=None)
series_scatter(pred_adj.ix[ti(hiv=='HIV+')].ix[o], pred_horvath_adj, ax=axs[0],
color=colors[0], ann=None)
fig.tight_layout()
fig.savefig(FIGDIR + 'hiv_model_agreement_filter.png', dpi=300)
In [29]:
#Do not import
v = detection_p.groupby('Sample_Name').size()
v.name = '# probes with poor detection'
series_scatter(diff, v)
In [30]:
detection_p[detection_p.Sample_Name.isin(o)].groupby('Sample_Name').size().order()
Out[30]:
In [31]:
#pt = ti(((pred_adj - pred_horvath_adj)).abs().dropna() < 10)
Without filter.
In [32]:
pred_c = (pred_horvath_adj + pred_adj) / 2
pred_c = pred_c.ix[duration.index]
pred_c.name = 'Predicted Age (Combined)'
reg = linear_regression(age, pred_c.ix[ti(duration=='Control')])
pred_c = (pred_c - reg['intercept']) / reg['slope']
In [33]:
#Do not import
model_plot(pred_c)
In [34]:
get_error(age, pred_c, denominator='x', groups=hiv)
Out[34]:
With QC filter
In [35]:
pred_c = (pred_horvath_adj + pred_adj) / 2
pred_c = pred_c.ix[duration.index].ix[pt]
pred_c.name = 'Predicted Age (Combined)'
reg = linear_regression(age, pred_c.ix[ti(duration=='Control')])
pred_c = (pred_c - reg['intercept']) / reg['slope']
In [36]:
#Do not import
model_plot(pred_c)
In [37]:
get_error(age, pred_c, denominator='x', groups=hiv)
Out[37]:
In [38]:
#Do not import
fig, axs = subplots(1,3, figsize=(15,4), sharex=True, sharey=True)
plot_regression(age, pred_c.ix[ti(duration=='Control')], ax=axs[0])
plot_regression(age, pred_c.ix[ti(duration=='HIV Short')], ax=axs[1])
plot_regression(age, pred_c.ix[ti(duration=='HIV Long')], ax=axs[2])
axs[2].set_xlim(20,75)
axs[2].set_ylim(20,75);
In [39]:
get_error(age, pred_c, denominator='x', groups=duration)
Out[39]:
In [40]:
#Do not import
fig, axs = subplots(2,1, figsize=(5,7))
violin_plot_pandas(duration, pred_c / clinical.age,
order=['Control','HIV Short','HIV Long'],
ax=axs[0])
axs[0].set_ylabel('Age Acceleration (AMAR)')
violin_plot_pandas(duration, pred_c - clinical.age,
order=['Control','HIV Short','HIV Long'],
ax=axs[1])
axs[1].set_ylabel('Age Advance (Predicted - Actual)')
for ax in axs:
prettify_ax(ax)
fig.tight_layout()
Comparing short vs. long term infected HIV+ subjects
In [41]:
kruskal_pandas(duration.ix[ti(hiv == 'HIV+')], (pred_c - age))
Out[41]:
Association of age residuals with HIV status.
In [42]:
kruskal_pandas(hiv, (pred_c - age))
Out[42]:
Mean age resisuals broken down by HIV status. Not that HIV- is ~0 by construction as the full dataset is normalized by the controls.
In [44]:
(pred_c - age).dropna().groupby(hiv).mean()
Out[44]:
In [45]:
p2 = (pred_c - (clinical.age - (clinical['estimated duration hiv (months)'] / 12.)))
p2.name = 'Biological Time Since Onset'
a2 = (clinical['estimated duration hiv (months)'] / 12.)
a2.name = 'Actual Time Since Onset'
a2 = a2.ix[pt].dropna()
p2 = p2.ix[pt].dropna()
In [46]:
#Do not import
fig, ax = subplots(figsize=(5,4))
plot_regression(a2, p2, ax=ax)
fig.tight_layout()
This is just the same linear model as the plot above.
In [103]:
#Do not import
p4 = p2
a2.name = 'chron_age'
p4.name = 'bio_age'
df = process_factors([a2, p4], standardize=False)
fmla = robjects.Formula('bio_age ~ chron_age')
m = robjects.r.lm(fmla, df)
s = robjects.r.summary(m)
print '\n\n'.join(str(s).split('\n\n')[-3:])
In [105]:
#Do not import
print robjects.r.confint(m)
In [106]:
#Do not import
fig, axs = subplots(1,2, figsize=(10,4))
plot_regression(a2.ix[ti(duration=='HIV Short')], p2, ax=axs[0])
axs[0].set_xbound(0,5)
plot_regression(a2.ix[ti(duration=='HIV Long')], p2, ax=axs[1])