In [1]:
import numpy as np
import numba as nb
import tables

In [2]:
sizeme = lambda n: n*9 / (1024.0**2)

In [7]:
n = 100000000/2
sizeme(n)


Out[7]:
429.1534423828125

In [8]:
data = np.random.randint(0,255,n).astype(np.uint8)
col_ind = np.random.randint(0,100,n).astype(np.int32)
row_ind = np.random.randint(0,100,n).astype(np.int32)

In [9]:
arrays_dir = './'
index_dir = '/tmp/'

In [10]:
f.close()
f2.close()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-0fe927beb707> in <module>()
----> 1 f.close()
      2 f2.close()

NameError: name 'f' is not defined

In [11]:
#create description of your table
class Table_Description(tables.IsDescription):
    data = tables.UInt8Col()   # 16-character String
    col_ind = tables.Int32Col()
    row_ind = tables.Int32Col()
    
blosc5 = tables.Filters(complib='blosc', complevel=5)
#create hdf5 file and table
f = tables.open_file(arrays_dir + 'coassoc.h5', mode="w", filters=blosc5)
a = f.create_table("/","coassoc",
                   description=Table_Description,
                   expectedrows=n)

cs = a.chunkshape[0] # chunk size

In [12]:
f2 = tables.open_file(arrays_dir + 'coassoc2.h5', mode="w")
a2 = f2.create_table("/","coassoc",
                   description=Table_Description,
                   expectedrows=n)

cs2 = a2.chunkshape[0] # chunk size

In [14]:
%time a.append((col_ind, data, row_ind))


CPU times: user 5.81 s, sys: 582 ms, total: 6.39 s
Wall time: 6.4 s

In [15]:
%time a2.append((col_ind, data, row_ind))


CPU times: user 2.04 s, sys: 1.29 s, total: 3.33 s
Wall time: 3.35 s

In [ ]:
%time a.cols.data.create_index(9, kind='full', tmp_dir=index_dir)
%time a2.cols.data.create_index(9, kind='full', tmp_dir=index_dir)

In [28]:
def wrap():
    for i in xrange(50):
        start = i*cs
        stop = start+cs
        b = a.read_sorted("data", checkCSI=True, start=start, stop=stop)
        
def wrap2():
    for i in xrange(50):
        start = i*cs2
        stop = start+cs2
        b2 = a2.read_sorted("data", checkCSI=True, start=start, stop=stop)

In [29]:
%time wrap()
%time wrap2()


CPU times: user 12.9 s, sys: 568 ms, total: 13.5 s
Wall time: 13.5 s
CPU times: user 3.57 s, sys: 1.48 s, total: 5.04 s
Wall time: 5.05 s

In [27]:
%time b = a.read_sorted("data", checkCSI=True, start=0, stop=50*cs)
%time b2 = a2.read_sorted("data", checkCSI=True, start=0, stop=50*cs)


CPU times: user 28.8 s, sys: 194 ms, total: 29 s
Wall time: 29 s
CPU times: user 28 s, sys: 23.1 ms, total: 28 s
Wall time: 28 s