In [1]:
import tables as tb
import h5py
import numpy as np
In [5]:
import tables as tb
import numpy as np
n, k = 200000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
print len(ind), "rows to select"
for chunk in (1, 10, 100, 1000, 10000):
f = tb.openFile("test", "w")
a = f.createEArray('/', 'test',
obj=np.random.rand(n, k),
chunkshape=(chunk, k))
print "chunk =", chunk
%timeit -r1 -n1 a[ind].reshape((-1, k))
print
f.close()
In [10]:
n, k = 100000, 100
shape = (n, k)
In [11]:
f = tb.openFile("test", "w")
a = f.createEArray('/', 'test', obj=np.random.rand(n // 10, k),
chunkshape=(1, k))
for _ in range(9):
a.append(np.random.rand(n // 10, k))
f.close()
Slice.
In [12]:
with tb.openFile("test", "r") as f:
a = f.root.test
%timeit -r1 -n1 a[:n // 10]
Indices (with PyTables hack).
In [13]:
with tb.openFile("test", "r") as f:
a = f.root.test
ind = np.arange(n // 10)
%timeit -r1 -n1 a[ind].reshape((n // 10, k))
Slice.
In [14]:
with tb.openFile("test", "r") as f:
a = f.root.test
%timeit -r1 -n1 a[::10,0]
Indices (with PyTables hack).
In [15]:
with tb.openFile("test", "r") as f:
a = f.root.test
ind = np.arange(0, n, 10)
%timeit -r1 -n1 a[ind].reshape((n // 10, k))
p = 0.01
In [17]:
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test", "r") as f:
a = f.root.test
%timeit -r1 -n1 a[ind].reshape((-1, k))
In [19]:
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test", "r") as f:
a = f.root.test
%timeit -r1 -n1 a[ind,0]
p = 0.1
In [18]:
ind = np.random.rand(n) < .1
ind = np.nonzero(ind)[0]
with tb.openFile("test", "r") as f:
a = f.root.test
%timeit -r1 -n1 a[ind].reshape((-1, k))
In [ ]:
f = h5py.File("test", "r")
p = 0.1
In [20]:
ind = np.random.rand(n) < .1
ind = np.nonzero(ind)[0]
with h5py.File("test", "r") as f:
a = f.get("test")
%timeit -r1 -n1 a[ind, 0]
In [ ]: