There are lies, damn lies and benchmarks...
In [1]:
import zarr
zarr.__version__
Out[1]:
In [2]:
import bsddb3
bsddb3.__version__
Out[2]:
In [3]:
import lmdb
lmdb.__version__
Out[3]:
In [4]:
import numpy as np
In [5]:
import dbm.gnu
import dbm.ndbm
In [6]:
import os
import shutil
bench_dir = '../data/bench'
def clean():
if os.path.isdir(bench_dir):
shutil.rmtree(bench_dir)
os.makedirs(bench_dir)
def setup(a, name='foo/bar'):
global fdict_z, hdict_z, lmdb_z, gdbm_z, ndbm_z, bdbm_btree_z, bdbm_hash_z, zip_z, dir_z
clean()
fdict_root = zarr.group(store=dict())
hdict_root = zarr.group(store=zarr.DictStore())
lmdb_root = zarr.group(store=zarr.LMDBStore(os.path.join(bench_dir, 'lmdb')))
gdbm_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'gdbm'), open=dbm.gnu.open))
ndbm_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'ndbm'), open=dbm.ndbm.open))
bdbm_btree_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'bdbm_btree'), open=bsddb3.btopen))
bdbm_hash_root = zarr.group(store=zarr.DBMStore(os.path.join(bench_dir, 'bdbm_hash'), open=bsddb3.hashopen))
zip_root = zarr.group(store=zarr.ZipStore(os.path.join(bench_dir, 'zip'), mode='w'))
dir_root = zarr.group(store=zarr.DirectoryStore(os.path.join(bench_dir, 'dir')))
fdict_z = fdict_root.empty_like(name, a)
hdict_z = hdict_root.empty_like(name, a)
lmdb_z = lmdb_root.empty_like(name, a)
gdbm_z = gdbm_root.empty_like(name, a)
ndbm_z = ndbm_root.empty_like(name, a)
bdbm_btree_z = bdbm_btree_root.empty_like(name, a)
bdbm_hash_z = bdbm_hash_root.empty_like(name, a)
zip_z = zip_root.empty_like(name, a)
dir_z = dir_root.empty_like(name, a)
# check compression ratio
fdict_z[:] = a
return fdict_z.info
In [7]:
def save(a, z):
if isinstance(z.store, zarr.ZipStore):
# needed for zip benchmarks to avoid duplicate entries
z.store.clear()
z[:] = a
if hasattr(z.store, 'flush'):
z.store.flush()
def load(z, a):
z.get_basic_selection(out=a)
In [8]:
a = np.arange(500000000)
setup(a)
Out[8]:
In [9]:
%timeit save(a, fdict_z)
In [10]:
%timeit save(a, hdict_z)
In [11]:
%timeit save(a, lmdb_z)
In [12]:
%timeit save(a, gdbm_z)
In [13]:
%timeit save(a, ndbm_z)
In [14]:
%timeit save(a, bdbm_btree_z)
In [15]:
%timeit save(a, bdbm_hash_z)
In [16]:
%timeit save(a, zip_z)
In [17]:
%timeit save(a, dir_z)
In [18]:
%timeit load(fdict_z, a)
In [19]:
%timeit load(hdict_z, a)
In [20]:
%timeit load(lmdb_z, a)
In [21]:
%timeit load(gdbm_z, a)
In [22]:
%timeit load(ndbm_z, a)
In [23]:
%timeit load(bdbm_btree_z, a)
In [24]:
%timeit load(bdbm_hash_z, a)
In [25]:
%timeit load(zip_z, a)
In [26]:
%timeit load(dir_z, a)
In [28]:
np.random.seed(42)
a = np.random.randint(0, 2**30, size=500000000)
setup(a)
Out[28]:
In [29]:
%timeit -r3 save(a, fdict_z)
In [30]:
%timeit -r3 save(a, hdict_z)
In [31]:
%timeit -r3 save(a, lmdb_z)
In [32]:
%timeit -r3 save(a, gdbm_z)
In [33]:
%timeit -r3 save(a, ndbm_z)
In [34]:
%timeit -r3 save(a, bdbm_btree_z)
In [35]:
%timeit -r3 save(a, bdbm_hash_z)
In [36]:
%timeit -r3 save(a, zip_z)
In [38]:
%timeit -r3 save(a, dir_z)
In [39]:
%timeit -r3 load(fdict_z, a)
In [40]:
%timeit -r3 load(hdict_z, a)
In [41]:
%timeit -r3 load(lmdb_z, a)
In [42]:
%timeit -r3 load(gdbm_z, a)
In [43]:
%timeit -r3 load(ndbm_z, a)
In [44]:
%timeit -r3 load(bdbm_btree_z, a)
In [45]:
%timeit -r3 load(bdbm_hash_z, a)
In [46]:
%timeit -r3 load(zip_z, a)
In [47]:
%timeit -r3 load(dir_z, a)
In [48]:
import dask.array as da
In [50]:
def dask_op(source, sink, chunks=None):
if isinstance(sink.store, zarr.ZipStore):
sink.store.clear()
if chunks is None:
try:
chunks = sink.chunks
except AttributeError:
chunks = source.chunks
d = da.from_array(source, chunks=chunks, asarray=False, fancy=False, lock=False)
result = (d // 2) * 2
da.store(result, sink, lock=False)
if hasattr(sink.store, 'flush'):
sink.store.flush()
In [76]:
%time dask_op(fdict_z, fdict_z)
In [77]:
%time dask_op(hdict_z, fdict_z)
In [78]:
%time dask_op(lmdb_z, fdict_z)
In [79]:
%time dask_op(gdbm_z, fdict_z)
In [80]:
%time dask_op(ndbm_z, fdict_z)
In [81]:
%time dask_op(bdbm_btree_z, fdict_z)
In [82]:
%time dask_op(bdbm_hash_z, fdict_z)
In [83]:
%time dask_op(zip_z, fdict_z)
In [84]:
%time dask_op(dir_z, fdict_z)
In [51]:
%time dask_op(fdict_z, hdict_z)
In [52]:
%time dask_op(fdict_z, lmdb_z)
In [59]:
%time dask_op(fdict_z, gdbm_z)
In [54]:
%time dask_op(fdict_z, ndbm_z)
In [61]:
%time dask_op(fdict_z, bdbm_btree_z)
In [56]:
%time dask_op(fdict_z, bdbm_hash_z)
In [57]:
%time dask_op(fdict_z, zip_z)
In [58]:
%time dask_op(fdict_z, dir_z)
In [62]:
lmdb_z.store.close()
gdbm_z.store.close()
ndbm_z.store.close()
bdbm_btree_z.store.close()
bdbm_hash_z.store.close()
zip_z.store.close()
In [ ]: