Pandas & HDF5



In [1]:

    
import os
import os.path

output_fname = 'output/storage.h5'

fname = os.path.basename(output_fname)
dname = os.path.dirname(output_fname)

if not os.path.exists(dname):
    os.mkdir(dname)



In [2]:

    
import numpy as np
from pandas import HDFStore,DataFrame

# create (or open) an hdf5 file and opens in append mode
hdf = HDFStore(os.path.join(dname, fname))



In [3]:

    
df = DataFrame(np.random.rand(5,3), columns=('A','B','C'))
# put the dataset in the storage
hdf.put('d1', df, format='table', data_columns=True)



In [4]:

    
print(hdf['d1'].shape)









    



(5, 3)



In [5]:

    
hdf.append('d1', DataFrame(np.random.rand(5,3), 
           columns=('A','B','C')), 
           format='table', data_columns=True)
hdf.close() # closes the file



In [6]:

    
from pandas import read_hdf
# this query selects the columns A and B
# where the values of A is greather than 0.5
hdf = read_hdf('output/storage.h5', 'd1', where=['A>.5'], columns=['A','B'])



In [7]:

    
hdf = HDFStore(output_fname)
hdf.put('tables/t1', DataFrame(np.random.rand(20,5)))
hdf.put('tables/t2', DataFrame(np.random.rand(10,3)))
hdf.put('new_tables/t1', DataFrame(np.random.rand(15,2)))



In [8]:

    
print(hdf)









    



<class 'pandas.io.pytables.HDFStore'>
File path: output/storage.h5
/d1                       frame_table  (typ->appendable,nrows->10,ncols->3,indexers->[index],dc->[A,B,C])
/new_tables/t1            frame        (shape->[15,2])                                                   
/tables/t1                frame        (shape->[20,5])                                                   
/tables/t2                frame        (shape->[10,3])