In [40]:
import os
import time
import h5py
import numpy as np
In [41]:
n, k, l = 1000000, 50, 128
In [42]:
def create(filename):
"""Create a contiguous array of size (n,k,l)."""
with h5py.File(filename, "w") as f:
a = f.create_dataset('/test',dtype=np.int16,
shape=(n,k,l))
n_ = n//50
for i in range(50):
print i,
a[i*n_:(i+1)*n_,...] = np.random.randint(size=(n_,k,l),
low=-32000,
high=32000)
Create the file if necessary.
In [43]:
filename = 'test_%d.h5' % n
if not os.path.exists(filename):
create(filename)
This function loads an array from disk.
In [44]:
def read(a, out):
for j, i in enumerate(ind):
out[j:j+1,...] = a[i:i+1,...]
return out
Choose rows randomly.
In [45]:
size = 100
ind = np.random.randint(size=size, low=0, high=n)
ind = np.unique(ind)
Load those rows in memory from HDF5.
In [46]:
with h5py.File(filename, "r") as f:
a = f['/test']
out = np.empty((len(ind),k,l), dtype=a.dtype)
t0 = time.clock()
read(a, out)
t1 = time.clock()
d = t1-t0
bandwidth = size*k*l*2/(1024*1024.)/d
print("Elapsed: {0:.2f} s".format(d))
print("Bandwidth: {0:.1f} MB/s".format(bandwidth))