In [ ]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
print len(ind), "rows to select out of", n
for chunk in (1, 10, 100, 1000, 10000):
    with tb.openFile("test", "w") as f:
        a = f.createEArray('/', 'test', 
                           obj=np.random.rand(n//10, k),
                           chunkshape=(chunk, k))
        for _ in range(9):
            a.append(np.random.rand(n//10, k))
        print "chunk =", chunk
        %timeit -r1 -n1 a[ind].reshape((-1, k))
        print

In [8]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
print len(ind), "rows to select out of", n
for chunk in (100, 1000, 10000):
    with tb.openFile("test", "w") as f:
        a = f.createEArray('/', 'test', 
                           obj=np.random.rand(n//10, k),
                           chunkshape=(chunk, 10))
        for _ in range(9):
            a.append(np.random.rand(n//10, k))
        print "chunk =", chunk
        %timeit -r1 -n1 a[ind,0]
        print


10013 rows to select out of 1000000
chunk = 100
1 loops, best of 1: 9.98 s per loop

chunk = 1000
1 loops, best of 1: 4.95 s per loop

chunk = 10000
1 loops, best of 1: 4.23 s per loop


In [6]:
import tables as tb
import numpy as np
n, k = 1000000, 100
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test", "w") as f:
    a = f.createEArray('/', 'test', 
                       obj=np.random.rand(n//10, k),
                       chunkshape=(100000, 1))
    for _ in range(9):
        a.append(np.random.rand(n//10, k))
    %timeit -r1 -n1 [a[:,i][ind] for i in range(5)]
    #%timeit -r1 -n1 a[ind,0]
    print


1 loops, best of 1: 26.9 ms per loop

NEW

import tables as tb import numpy as np n, k = 1000000, 1000 ind = np.random.rand(n) < .01 ind = np.nonzero(ind)[0] with tb.openFile("test2", "w") as f: a = f.createEArray('/', 'test', obj=np.random.rand(n//10, k), chunkshape=(100000, 1)) for _ in range(9): print _ a.append(np.random.rand(n//10, k)) #%timeit -r1 -n1 [a[:,i][ind] for i in range(2)] #%timeit -r1 -n1 a[ind,0] print
import tables as tb import numpy as np n, k = 1000000, 1000 ind = np.random.rand(n) < .01 ind = np.nonzero(ind)[0] with tb.openFile("test2b", "w") as f: a = f.createEArray('/', 'test', obj=np.random.rand(n//10, k, 2), chunkshape=(100000, 1, 1)) for _ in range(9): print _ a.append(np.random.rand(n//10, k, 2)) #%timeit -r1 -n1 [a[:,i][ind] for i in range(2)] #%timeit -r1 -n1 a[ind,0] print

In [4]:
import tables as tb
import numpy as np
n, k = 1000000, 1000
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test2", "r") as f:
    a = f.root.test
    %timeit -r1 -n1 [a[:,i][ind] for i in range(2) for _ in range(2)]
    print


1 loops, best of 1: 18.7 ms per loop


In [5]:
import tables as tb
import numpy as np
n, k = 1000000, 1000
ind = np.random.rand(n) < .01
ind = np.nonzero(ind)[0]
with tb.openFile("test2b", "r") as f:
    a = f.root.test
    %timeit -r1 -n1 [a[:,i,j][ind] for i in range(2) for j in range(2)]
    %timeit -r1 -n1 [a[:,i,:][ind] for i in range(2)]
    print


1 loops, best of 1: 21 ms per loop
1 loops, best of 1: 48.2 ms per loop


In [ ]: