In [1]:
import os
import timeit
import tables as tb
import h5py
import numpy as np

In [2]:
n, k, l = 1000000, 50, 64

In [3]:
def create(filename):
    with h5py.File(filename, "w") as f:
        a = f.create_dataset('/test',dtype=np.int16,
                             shape=(n,k,l))
        n_ = n//20
        for i in range(20):
            print i,
            a[i*n_:(i+1)*n_,...] = np.random.rand(n_, k, l)

In [11]:
filename = 'waveforms.h5'
if not os.path.exists(filename):
    create(filename)

In [24]:
def read(a, out):
    for j, i in enumerate(ind):
        out[j:j+1,...] = a[i:i+1,...]
    return out

In [51]:
ind = np.random.randint(size=100, low=0, high=n)
ind = np.unique(ind)

In [52]:
with tb.openFile(filename, "r") as f:
    a = f.root.test
    out = np.empty((len(ind),k,l), dtype=a.dtype)
    %timeit -r1 -n1 read(a, out)


1 loops, best of 1: 331 ms per loop

In [44]:
with h5py.File(filename, "r") as f:
    a = f['/test']
    out = np.empty((len(ind),k,l), dtype=a.dtype)
    %timeit -r1 -n1 read(a, out)


1 loops, best of 1: 452 ms per loop