In [ ]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
print len(ind), "rows to select out of", n
for chunk in (1, 10, 100, 1000, 10000):
with tb.openFile("test", "w") as f:
a = f.createEArray('/', 'test',
obj=np.random.rand(n//10, k),
chunkshape=(chunk, k))
for _ in range(9):
a.append(np.random.rand(n//10, k))
print "chunk =", chunk
%timeit -r1 -n1 a[ind].reshape((-1, k))
print
In [8]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
print len(ind), "rows to select out of", n
for chunk in (100, 1000, 10000):
with tb.openFile("test", "w") as f:
a = f.createEArray('/', 'test',
obj=np.random.rand(n//10, k),
chunkshape=(chunk, 10))
for _ in range(9):
a.append(np.random.rand(n//10, k))
print "chunk =", chunk
%timeit -r1 -n1 a[ind,0]
print
In [6]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test", "w") as f:
a = f.createEArray('/', 'test',
obj=np.random.rand(n//10, k),
chunkshape=(100000, 1))
for _ in range(9):
a.append(np.random.rand(n//10, k))
%timeit -r1 -n1 [a[:,i][ind] for i in range(5)]
#%timeit -r1 -n1 a[ind,0]
print
In [4]:
import tables as tb
import numpy as np
n, k = 1000000, 1000
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test2", "r") as f:
a = f.root.test
%timeit -r1 -n1 [a[:,i][ind] for i in range(2) for _ in range(2)]
print
In [5]:
import tables as tb
import numpy as np
n, k = 1000000, 1000
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test2b", "r") as f:
a = f.root.test
%timeit -r1 -n1 [a[:,i,j][ind] for i in range(2) for j in range(2)]
%timeit -r1 -n1 [a[:,i,:][ind] for i in range(2)]
print
In [ ]: