notebook.community

Edit and run



In [1]:

    
import numpy as np
import numba as nb
import tables



In [2]:

    
sizeme = lambda n: n*9 / (1024.0**2)



In [7]:

    
n = 100000000/2
sizeme(n)









    Out[7]:





429.1534423828125



In [8]:

    
data = np.random.randint(0,255,n).astype(np.uint8)
col_ind = np.random.randint(0,100,n).astype(np.int32)
row_ind = np.random.randint(0,100,n).astype(np.int32)



In [9]:

    
arrays_dir = './'
index_dir = '/tmp/'



In [10]:

    
f.close()
f2.close()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-0fe927beb707> in <module>()
----> 1 f.close()
      2 f2.close()

NameError: name 'f' is not defined



In [11]:

    
#create description of your table
class Table_Description(tables.IsDescription):
    data = tables.UInt8Col()   # 16-character String
    col_ind = tables.Int32Col()
    row_ind = tables.Int32Col()
    
blosc5 = tables.Filters(complib='blosc', complevel=5)
#create hdf5 file and table
f = tables.open_file(arrays_dir + 'coassoc.h5', mode="w", filters=blosc5)
a = f.create_table("/","coassoc",
                   description=Table_Description,
                   expectedrows=n)

cs = a.chunkshape[0] # chunk size



In [12]:

    
f2 = tables.open_file(arrays_dir + 'coassoc2.h5', mode="w")
a2 = f2.create_table("/","coassoc",
                   description=Table_Description,
                   expectedrows=n)

cs2 = a2.chunkshape[0] # chunk size



In [14]:

    
%time a.append((col_ind, data, row_ind))









    



CPU times: user 5.81 s, sys: 582 ms, total: 6.39 s
Wall time: 6.4 s



In [15]:

    
%time a2.append((col_ind, data, row_ind))









    



CPU times: user 2.04 s, sys: 1.29 s, total: 3.33 s
Wall time: 3.35 s



In [ ]:

    
%time a.cols.data.create_index(9, kind='full', tmp_dir=index_dir)
%time a2.cols.data.create_index(9, kind='full', tmp_dir=index_dir)



In [28]:

    
def wrap():
    for i in xrange(50):
        start = i*cs
        stop = start+cs
        b = a.read_sorted("data", checkCSI=True, start=start, stop=stop)
        
def wrap2():
    for i in xrange(50):
        start = i*cs2
        stop = start+cs2
        b2 = a2.read_sorted("data", checkCSI=True, start=start, stop=stop)



In [29]:

    
%time wrap()
%time wrap2()









    



CPU times: user 12.9 s, sys: 568 ms, total: 13.5 s
Wall time: 13.5 s
CPU times: user 3.57 s, sys: 1.48 s, total: 5.04 s
Wall time: 5.05 s



In [27]:

    
%time b = a.read_sorted("data", checkCSI=True, start=0, stop=50*cs)
%time b2 = a2.read_sorted("data", checkCSI=True, start=0, stop=50*cs)









    



CPU times: user 28.8 s, sys: 194 ms, total: 29 s
Wall time: 29 s
CPU times: user 28 s, sys: 23.1 ms, total: 28 s
Wall time: 28 s