In [1]:
import os
if os.getcwd().endswith('Parallel'):
os.chdir('..')
In [2]:
import NotebookImport
from Benchmarks.Model_Comparison_MF import *
In [3]:
k = ti((age < 68) & (age > 25))
In [4]:
dd = logit_adj(df_meth.ix[:, k])
m = dd.mean(1)
s = dd.std(1)
df_norm = dd.subtract(m, axis=0).divide(s, axis=0)
df_norm = df_norm.clip(-7,7)
In [5]:
def chunkify_df(df, store, table_name, N=100):
df = df.dropna(1)
for i in range(N):
g = df.index[i::N]
dd = df.ix[g]
dd.to_hdf(store, '{}/chunk_{}'.format(table_name, i))
In [8]:
gender.value_counts()
Out[8]:
In [9]:
labels.ix[k.intersection(df_meth.columns)].value_counts()
Out[9]:
In [8]:
store = '/cellar/users/agross/Data/tmp/for_parallel.h5'
store = pd.HDFStore(store)
store['labels'] = labels
store['bio_age'] = mc_adj_c
store['cell_counts'] = cell_counts
store['age'] = age
store['gender'] = gender == 'M'
#store['bio_age'] = age_adv.append(age_adv0)
In [9]:
labels.ix[k.intersection(df_meth.columns)].value_counts()
Out[9]:
In [ ]:
chunkify_df(df_norm.ix[:, ti(labels == 's1')], store.filename, 'in_set_s1')
chunkify_df(df_norm.ix[:, ti(labels == 's2')], store.filename, 'in_set_s2')
chunkify_df(df_norm.ix[:, ti(labels == 's3')], store.filename, 'in_set_s3')
In [ ]:
store.close()
store.open()