In [1]:
import numpy as np
import tables
import os
filename = "test.h5"
# We create an array with 32 1h-long parallel signals at 20 kHz and 16 bits (creating the array with 1 minute-long chunks).
datatype = np.dtype(np.int16)
itemsize = datatype.itemsize
nchannels = 32
freq = 20000.
rowsize = freq * 60
nchunks = 10
#chunkshape = (1024, nchannels)
In [2]:
#if not os.path.exists(filename):
# Open HDF5 file for writing.
print("Creating HDF5 file (~4.3 GB).")
fw = tables.openFile(filename, 'w')
atom = tables.Atom.from_dtype(datatype)
ds = fw.createEArray(fw.root, 'raw_data', atom, shape=(0, nchannels), expectedrows=rowsize * nchunks)#, chunkshape=chunkshape)
# We create the file (~4.3GB total), this takes a few tens of seconds.
for _ in xrange(nchunks):
X = randint(low=-32000, high=32000, size=(rowsize, nchannels))
ds.append(X)
# Close all files.
ds.flush()
fw.close()
In [3]:
# Open the file.
f = tables.openFile(filename, 'r')
ds = f.root.raw_data
In [4]:
# Now we try to get undersampled data, i.e. one value every `step` values.
step = 1000
print("Getting {0:.1f} KB of data.".format(nchannels * rowsize * nchunks * itemsize / (1024. * float(step))))
In [5]:
%timeit -n 1 -r 1 x = ds[:freq*10:step,:]
In [6]:
f.close()
In [6]: