Pandas & HDF5


In [1]:
import os
import os.path

output_fname = 'output/storage.h5'

fname = os.path.basename(output_fname)
dname = os.path.dirname(output_fname)

if not os.path.exists(dname):
    os.mkdir(dname)

In [2]:
import numpy as np
from pandas import HDFStore,DataFrame

# create (or open) an hdf5 file and opens in append mode
hdf = HDFStore(os.path.join(dname, fname))

In [3]:
df = DataFrame(np.random.rand(5,3), columns=('A','B','C'))
# put the dataset in the storage
hdf.put('d1', df, format='table', data_columns=True)

In [4]:
print(hdf['d1'].shape)


(5, 3)

In [5]:
hdf.append('d1', DataFrame(np.random.rand(5,3), 
           columns=('A','B','C')), 
           format='table', data_columns=True)
hdf.close() # closes the file

In [6]:
from pandas import read_hdf
# this query selects the columns A and B
# where the values of A is greather than 0.5
hdf = read_hdf('output/storage.h5', 'd1', where=['A>.5'], columns=['A','B'])

In [7]:
hdf = HDFStore(output_fname)
hdf.put('tables/t1', DataFrame(np.random.rand(20,5)))
hdf.put('tables/t2', DataFrame(np.random.rand(10,3)))
hdf.put('new_tables/t1', DataFrame(np.random.rand(15,2)))

In [8]:
print(hdf)


<class 'pandas.io.pytables.HDFStore'>
File path: output/storage.h5
/d1                       frame_table  (typ->appendable,nrows->10,ncols->3,indexers->[index],dc->[A,B,C])
/new_tables/t1            frame        (shape->[15,2])                                                   
/tables/t1                frame        (shape->[20,5])                                                   
/tables/t2                frame        (shape->[10,3])