In [1]:
import sys
sys.path.insert(0, '..')
import zarr
zarr.__version__
Out[1]:
In [2]:
store = zarr.ZipStore('/data/coluzzi/ag1000g/data/phase1/release/AR3.1/haplotypes/main/zarr2/zstd/ag1000g.phase1.ar3.1.haplotypes.zip',
mode='r')
grp = zarr.Group(store)
z = grp['3L/calldata/genotype']
z
Out[2]:
In [5]:
import cProfile
cProfile.run('z[:10]', sort='cumtime')
In [6]:
import dask
import dask.array as da
dask.__version__
Out[6]:
In [7]:
d = da.from_array(z, chunks=z.chunks)
d
Out[7]:
In [8]:
%time d.sum(axis=1).compute()
Out[8]:
In [9]:
# compare with same data via directory store
store_dir = zarr.DirectoryStore('/data/coluzzi/ag1000g/data/phase1/release/AR3.1/haplotypes/main/zarr2/zstd/ag1000g.phase1.ar3.1.haplotypes')
grp_dir = zarr.Group(store_dir)
z_dir = grp_dir['3L/calldata/genotype']
z_dir
Out[9]:
In [10]:
d_dir = da.from_array(z_dir, chunks=z_dir.chunks)
d_dir
Out[10]:
In [11]:
%time d_dir.sum(axis=1).compute()
Out[11]:
In [ ]: