In [1]:
import numpy as np
import numba as nb
import tables
In [2]:
sizeme = lambda n: n*9 / (1024.0**2)
In [7]:
n = 100000000/2
sizeme(n)
Out[7]:
In [8]:
data = np.random.randint(0,255,n).astype(np.uint8)
col_ind = np.random.randint(0,100,n).astype(np.int32)
row_ind = np.random.randint(0,100,n).astype(np.int32)
In [9]:
arrays_dir = './'
index_dir = '/tmp/'
In [10]:
f.close()
f2.close()
In [11]:
#create description of your table
class Table_Description(tables.IsDescription):
data = tables.UInt8Col() # 16-character String
col_ind = tables.Int32Col()
row_ind = tables.Int32Col()
blosc5 = tables.Filters(complib='blosc', complevel=5)
#create hdf5 file and table
f = tables.open_file(arrays_dir + 'coassoc.h5', mode="w", filters=blosc5)
a = f.create_table("/","coassoc",
description=Table_Description,
expectedrows=n)
cs = a.chunkshape[0] # chunk size
In [12]:
f2 = tables.open_file(arrays_dir + 'coassoc2.h5', mode="w")
a2 = f2.create_table("/","coassoc",
description=Table_Description,
expectedrows=n)
cs2 = a2.chunkshape[0] # chunk size
In [14]:
%time a.append((col_ind, data, row_ind))
In [15]:
%time a2.append((col_ind, data, row_ind))
In [ ]:
%time a.cols.data.create_index(9, kind='full', tmp_dir=index_dir)
%time a2.cols.data.create_index(9, kind='full', tmp_dir=index_dir)
In [28]:
def wrap():
for i in xrange(50):
start = i*cs
stop = start+cs
b = a.read_sorted("data", checkCSI=True, start=start, stop=stop)
def wrap2():
for i in xrange(50):
start = i*cs2
stop = start+cs2
b2 = a2.read_sorted("data", checkCSI=True, start=start, stop=stop)
In [29]:
%time wrap()
%time wrap2()
In [27]:
%time b = a.read_sorted("data", checkCSI=True, start=0, stop=50*cs)
%time b2 = a2.read_sorted("data", checkCSI=True, start=0, stop=50*cs)