In [2]:
cd /cellar/users/agross/TCGA_Code/Methlation
In [3]:
import NotebookImport
from HIV_Age_Advancement import *
In [7]:
df = df_hiv.ix[:, pred_c.index]
In [14]:
dd = logit_adj(df)
m = dd.ix[:, ti(duration == 'Control')].mean(1)
s = dd.ix[:, ti(duration == 'Control')].std(1)
df_norm = dd.subtract(m, axis=0).divide(s, axis=0)
df_norm = df_norm.clip(-7,7)
In [15]:
df_norm.shape
Out[15]:
In [16]:
def chunkify_df(df, store, table_name, N=100):
df = df.dropna(1)
for i in range(N):
g = df.index[i::N]
dd = df.ix[g]
dd.to_hdf(store, '{}/chunk_{}'.format(table_name, i))
In [17]:
duration.ix[df_norm.columns].value_counts()
Out[17]:
In [19]:
hiv.value_counts()
Out[19]:
In [31]:
store = '/cellar/users/agross/Data/tmp/for_parallel.h5'
store = pd.HDFStore(store)
(hiv == 'HIV+').ix[pred_c.index].to_hdf(store, 'HIV')
#store['bio_age'] = mc_adj_c
#store['cell_counts'] = cell_counts
#store['age'] = age
#store['gender'] = gender == 'M'
#store['bio_age'] = age_adv.append(age_adv0)
In [28]:
chunkify_df(df_norm, store.filename, 'hiv_consented')
In [33]:
store.close()
store.open()
In [ ]: