HIV Methylation Age: Cell Composition Coorelates of Age Advancement

Imports and Helper Functions


In [1]:
import NotebookImport
from Model_Comparison_MF import *


importing IPython notebook from Model_Comparison_MF
importing IPython notebook from Age_Models_All_Patients
importing IPython notebook from Setup/Imports
Populating the interactive namespace from numpy and matplotlib
importing IPython notebook from Setup/MethylationAgeModels
importing IPython notebook from Setup/Read_HIV_Data

In [2]:
pts = ti(labels.isin(['s1','s3']))
pts = pts.intersection(mc_adj_c.index)
age = age.ix[pts].dropna()
pred_c = mc_adj_c.ix[pts].dropna()

Cell Composition


In [3]:
screen_feature(age, pearson_pandas, cell_counts.T, align=False)


Out[3]:
rho p q
CD8T -0.15 9.25e-07 5.55e-06
NK 0.10 5.38e-04 1.61e-03
Mono 0.06 3.19e-02 6.37e-02
Bcell 0.04 1.36e-01 2.05e-01
Gran -0.02 4.39e-01 5.27e-01
CD4T -0.01 7.16e-01 7.16e-01

In [4]:
screen_feature(pred_c, pearson_pandas, cell_counts.T, align=False)


Out[4]:
rho p q
CD8T -0.16 1.35e-07 8.13e-07
NK 0.13 1.30e-05 3.91e-05
Mono 0.10 1.01e-03 2.02e-03
Bcell -0.07 1.78e-02 2.67e-02
CD4T -0.05 7.35e-02 8.82e-02
Gran 0.05 1.25e-01 1.25e-01

In [5]:
screen_feature((pred_c - age), pearson_pandas, cell_counts.T, align=False)


Out[5]:
rho p q
Bcell -0.20 5.23e-12 3.14e-11
Gran 0.12 3.59e-05 1.08e-04
CD4T -0.08 5.25e-03 8.70e-03
Mono 0.08 5.80e-03 8.70e-03
NK 0.08 7.57e-03 9.08e-03
CD8T -0.06 3.75e-02 3.75e-02

In [6]:
residual = (pred_c - age)
residual.name = 'residual'

df = process_factors([residual, cell_counts.NK, cell_counts.CD4T, cell_counts.CD8T,
                      cell_counts.Bcell, cell_counts.Mono, cell_counts.Gran],
                     standardize=False)
fmla = robjects.Formula('residual ~ NK + CD4T + CD8T + Bcell + Mono + Gran')
m = robjects.r.lm(fmla, df)
s = robjects.r.summary(m)
print '\n\n'.join(str(s).split('\n\n')[-3:])


Coefficients: (1 not defined because of singularities)
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.4800     0.8599   1.721  0.08550 .  
NK            8.9736     3.3440   2.683  0.00739 ** 
CD4T         -5.6350     2.8913  -1.949  0.05155 .  
CD8T         -6.0203     3.9882  -1.510  0.13144    
Bcell       -18.8118     2.9826  -6.307 4.07e-10 ***
Mono          4.0888     7.0568   0.579  0.56243    
Gran              NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.104 on 1128 degrees of freedom
Multiple R-squared:  0.0537,	Adjusted R-squared:  0.0495 
F-statistic:  12.8 on 5 and 1128 DF,  p-value: 3.986e-12



In [7]:
age.name = 'age'
pred_c.name = 'pred_age'

df = process_factors([age.ix[ti(in_set==True)], cell_counts.NK, cell_counts.CD4T, cell_counts.CD8T,
                      cell_counts.Bcell, cell_counts.Mono, cell_counts.Gran,
                      pred_c], standardize=False)
fmla = robjects.Formula('age ~ pred_age + NK + CD4T + CD8T + Bcell + Mono + Gran')
m = robjects.r.lm(fmla, df)
s = robjects.r.summary(m)
print '\n\n'.join(str(s).split('\n\n')[-3:])


Coefficients: (1 not defined because of singularities)
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 13.31146    1.02701  12.961  < 2e-16 ***
pred_age     0.72329    0.01349  53.621  < 2e-16 ***
NK           0.14139    2.88338   0.049  0.96090    
CD4T         5.17463    2.59690   1.993  0.04655 *  
CD8T        -5.48321    3.54943  -1.545  0.12267    
Bcell       16.04825    4.91224   3.267  0.00112 ** 
Mono         3.60495    6.01347   0.599  0.54897    
Gran              NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.338 on 1124 degrees of freedom
Multiple R-squared:  0.7309,	Adjusted R-squared:  0.7294 
F-statistic: 508.8 on 6 and 1124 DF,  p-value: < 2.2e-16



In [8]:
df = process_factors([age.ix[ti(in_set==True)], cell_counts.NK, cell_counts.CD4T, cell_counts.CD8T,
                      cell_counts.Bcell, cell_counts.Mono, cell_counts.Gran,
                      pred_c], standardize=False)
fmla = robjects.Formula('pred_age ~  age + NK + CD4T + CD8T + Bcell + Mono + Gran')
m = robjects.r.lm(fmla, df)
s = robjects.r.summary(m)
print '\n\n'.join(str(s).split('\n\n')[-3:])


Coefficients: (1 not defined because of singularities)
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.76024    1.28973   1.365 0.172585    
age           0.99400    0.01854  53.621  < 2e-16 ***
NK            9.15425    3.36913   2.717 0.006687 ** 
CD4T         -5.46345    3.04535  -1.794 0.073077 .  
CD8T         -6.20882    4.16128  -1.492 0.135968    
Bcell       -19.21910    5.75740  -3.338 0.000871 ***
Mono          4.72759    7.04928   0.671 0.502582    
Gran               NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.086 on 1124 degrees of freedom
Multiple R-squared:  0.7346,	Adjusted R-squared:  0.7332 
F-statistic: 518.6 on 6 and 1124 DF,  p-value: < 2.2e-16



In [9]:
pred.name = 'pred_age'

In [10]:
df = process_factors([age.ix[ti(in_set==True)], cell_counts.NK, cell_counts.CD4T, cell_counts.CD8T,
                      cell_counts.Bcell, cell_counts.Mono, cell_counts.Gran,
                      pred_c], standardize=False)
fmla = robjects.Formula('pred_age ~  age + CD8T')
m = robjects.r.lm(fmla, df)
s = robjects.r.summary(m)
print '\n\n'.join(str(s).split('\n\n')[-3:])


Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.71727    1.09054   0.658   0.5109    
age          0.99978    0.01855  53.901   <2e-16 ***
CD8T        -9.06228    3.96132  -2.288   0.0223 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.147 on 1128 degrees of freedom
Multiple R-squared:  0.7273,	Adjusted R-squared:  0.7268 
F-statistic:  1504 on 2 and 1128 DF,  p-value: < 2.2e-16