notebook.community

Edit and run

The databases created by pymc contain lots of information that are not used for our analysis. To provide the relevant information online, we extract it and store it in pandas dataframes.



In [1]:

    
import pymc
import pandas as pd
import os



In [2]:

    
traces_path = './results/cell_number_data_160420T151934_10000000/'



In [3]:

    
columns = ['GF_left_pop', 'GF_right_pop', 'step_GF',  'mi_left_pop', 'mi_right_pop', 'step_mi',
               'switchpoint_pop', 'GF_sigma_inter', 'mi_sigma_inter', 'switchpoint_sigma_inter', 'deviance']



In [4]:

    
for i in [0, 3, 4, 6, 8]:
    db = pymc.database.hdf5.load(os.path.join(traces_path, '{}.hdf5'.format(i)))
    df = pd.DataFrame()
    for column in columns:
        df[column] = db.trace(column)()
    df.to_hdf('./traces/{}.h5'.format(i), 'traces', comblib = 'blosc', complevel = 9)