In [1]:
import os
import timeit
import tables as tb
import numpy as np
In [2]:
def create_contiguous(filename):
with tb.openFile(filename, "w") as f:
a = f.createArray('/', 'test', atom=tb.Float32Atom(),
shape=(n,k))
n_ = n//10
for i in range(10):
print i,
a[i*n_:(i+1)*n_,...] = np.random.rand(n_, k)
In [3]:
def create_chunked(filename):
with tb.openFile(filename, "w") as f:
a = f.createEArray('/', 'test', atom=tb.Float32Atom(),
shape=(0,k), chunkshape=(100, k))
n_ = n//10
for i in range(10):
print i,
a.append(np.random.rand(n_, k))
In [4]:
def benchmark(*funcs):
with tb.openFile(filename, "r") as f:
a = f.root.test
for func in funcs:
%timeit -r1 -n1 func(a)
In [5]:
n, k = 5000000, 100
In [6]:
filename_contiguous = 'features_contiguous.h5'
if not os.path.exists(filename_contiguous):
create_contiguous(filename_contiguous)
In [7]:
filename_chunked = 'features_chunked.h5'
if not os.path.exists(filename_chunked):
create_chunked(filename_chunked)
In [8]:
ind = np.random.randint(size=50000, low=0, high=n)
ind = np.unique(ind)
In [9]:
def read1(a):
return a[ind,:]
In [10]:
def read2(a):
out = np.empty((len(ind),) + a.shape[1:], dtype=a.dtype)
for j, i in enumerate(ind):
out[j:j+1,...] = a[i:i+1,...]
return out
In [11]:
with tb.openFile(filename_contiguous, "r") as f:
a = f.root.test
%timeit -r1 -n1 read2(a)
In [12]:
with tb.openFile(filename_chunked, "r") as f:
a = f.root.test
%timeit -r1 -n1 read2(a)