The databases created by pymc contain lots of information that are not used for our analysis. To provide the relevant information online, we extract it and store it in pandas dataframes.


In [1]:
import pymc
import pandas as pd
import os

In [2]:
traces_path = './results/cell_number_data_160420T151934_10000000/'

In [3]:
columns = ['GF_left_pop', 'GF_right_pop', 'step_GF',  'mi_left_pop', 'mi_right_pop', 'step_mi',
               'switchpoint_pop', 'GF_sigma_inter', 'mi_sigma_inter', 'switchpoint_sigma_inter', 'deviance']

In [4]:
for i in [0, 3, 4, 6, 8]:
    db = pymc.database.hdf5.load(os.path.join(traces_path, '{}.hdf5'.format(i)))
    df = pd.DataFrame()
    for column in columns:
        df[column] = db.trace(column)()
    df.to_hdf('./traces/{}.h5'.format(i), 'traces', comblib = 'blosc', complevel = 9)