Analysis of HIV and Biological Age


In [1]:
import NotebookImport
from HIV_Age_Advancement import *


importing IPython notebook from HIV_Age_Advancement
importing IPython notebook from Setup/Imports
Populating the interactive namespace from numpy and matplotlib
importing IPython notebook from Setup/MethylationAgeModels
importing IPython notebook from Setup/Read_HIV_Data
Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.28121    0.91731   5.757 5.77e-08 ***
chron_age    0.97957    0.05903  16.594  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.805 on 131 degrees of freedom
Multiple R-squared:  0.6776,	Adjusted R-squared:  0.6752 
F-statistic: 275.4 on 1 and 131 DF,  p-value: < 2.2e-16



In [2]:
from Setup.DX_Imports import *


importing IPython notebook from Setup/DX_Imports

Normalize Data


In [3]:
df_hiv2 = df_hiv.ix[:, pred_c.index]
df_hiv2 = df_hiv2.dropna(1, how='all')

dd = logit_adj(df_hiv2)
m = dd.mean(1)
s = dd.std(1)
df_norm = dd.subtract(m, axis=0).divide(s, axis=0)

In [4]:
t_hiv = ttest_df(hiv == 'HIV+', df_norm)

In [5]:
rti = isl.Relation_to_Island
rti = rti.replace({'N_Shelf':'Shelf','S_Shelf':'Shelf','N_Shore':'Shore',
                   'S_Shore':'Shore'})

Prepare Data for Association Tests


In [9]:
def chunkify_df(df, store, table_name, N=100):
    df = df.dropna(1)
    for i in range(N):
        g = df.index[i::N]
        dd = df.ix[g]
        dd.to_hdf(store, '{}/chunk_{}'.format(table_name, i))

In [10]:
store = '/cellar/users/agross/Data/tmp/for_parallel.h5'
store = pd.HDFStore(store)

In [105]:
store['bio_age_hiv'] = pred_c
store['HIV'] = hiv.ix[pred_c.index] == 'HIV+'
store['duration'] = clinical['estimated duration hiv (months)'].ix[ti(duration == 'HIV Short')]
duration_t = clinical['estimated duration hiv (months)']

In [12]:
chunkify_df(df_norm, store.filename, 'primary_cohort')


/cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas/io/pytables.py:2441: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->unicode,key->axis0] [items->None]

  warnings.warn(ws, PerformanceWarning)
/cellar/users/agross/anaconda2/lib/python2.7/site-packages/pandas/io/pytables.py:2441: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->unicode,key->block0_items] [items->None]

  warnings.warn(ws, PerformanceWarning)

In [106]:
store.close()
store.open()