In [1]:
import NotebookImport
from HIV_Age_Advancement import *
In [2]:
from Setup.DX_Imports import *
In [3]:
df_hiv2 = df_hiv.ix[:, pred_c.index]
df_hiv2 = df_hiv2.dropna(1, how='all')
dd = logit_adj(df_hiv2)
m = dd.mean(1)
s = dd.std(1)
df_norm = dd.subtract(m, axis=0).divide(s, axis=0)
In [4]:
t_hiv = ttest_df(hiv == 'HIV+', df_norm)
In [5]:
rti = isl.Relation_to_Island
rti = rti.replace({'N_Shelf':'Shelf','S_Shelf':'Shelf','N_Shore':'Shore',
'S_Shore':'Shore'})
In [9]:
def chunkify_df(df, store, table_name, N=100):
df = df.dropna(1)
for i in range(N):
g = df.index[i::N]
dd = df.ix[g]
dd.to_hdf(store, '{}/chunk_{}'.format(table_name, i))
In [10]:
store = '/cellar/users/agross/Data/tmp/for_parallel.h5'
store = pd.HDFStore(store)
In [105]:
store['bio_age_hiv'] = pred_c
store['HIV'] = hiv.ix[pred_c.index] == 'HIV+'
store['duration'] = clinical['estimated duration hiv (months)'].ix[ti(duration == 'HIV Short')]
duration_t = clinical['estimated duration hiv (months)']
In [12]:
chunkify_df(df_norm, store.filename, 'primary_cohort')
In [106]:
store.close()
store.open()