In [1]:
import numpy as np
import tables
import os

filename = "test.h5"

# We create an array with 32 1h-long parallel signals at 20 kHz and 16 bits (creating the array with 1 minute-long chunks).
datatype = np.dtype(np.int16)
itemsize = datatype.itemsize
nchannels = 32
freq = 20000.
rowsize = freq * 60
nchunks = 10
#chunkshape = (1024, nchannels)

In [2]:
#if not os.path.exists(filename):
# Open HDF5 file for writing.
print("Creating HDF5 file (~4.3 GB).")
fw = tables.openFile(filename, 'w')
atom = tables.Atom.from_dtype(datatype)
ds = fw.createEArray(fw.root, 'raw_data', atom, shape=(0, nchannels), expectedrows=rowsize * nchunks)#, chunkshape=chunkshape)
# We create the file (~4.3GB total), this takes a few tens of seconds.
for _ in xrange(nchunks):
    X = randint(low=-32000, high=32000, size=(rowsize, nchannels))
    ds.append(X)
# Close all files.
ds.flush()
fw.close()


Creating HDF5 file (~4.3 GB).

In [3]:
# Open the file.
f = tables.openFile(filename, 'r')
ds = f.root.raw_data

In [4]:
# Now we try to get undersampled data, i.e. one value every `step` values.
step = 1000
print("Getting {0:.1f} KB of data.".format(nchannels * rowsize * nchunks * itemsize / (1024. * float(step))))


Getting 750.0 KB of data.

In [5]:
%timeit -n 1 -r 1 x = ds[:freq*10:step,:]


1 loops, best of 1: 7.6 ms per loop

In [6]:
f.close()

In [6]: