In [1]:
import numpy as np
import dask.async
import dask.array as da
import sys
sys.path.insert(0, '../..')
%load_ext autoreload
%autoreload 1
%aimport allel
%aimport allel.model
%aimport allel.model.dask
allel.__version__
Out[1]:
In [2]:
g = allel.GenotypeArray([[[0, 0], [0, 1], [0, 1]],
[[0, 1], [1, 1], [1, 1]],
[[1, 1], [1, 1], [0, 0]],
[[1, 1], [1, 1], [0, 0]],
[[1, 1], [0, 1], [-1, -1]]], dtype='i1')
g
Out[2]:
In [3]:
# the mapping array is effectively a look-up table, telling how to transform
# integer values in each row
mapping = np.array([[0, 1], # no transformation
[1, 0], # 0->1, 1->0
[0, 0], # 0->0, 1->0
[1, 0], # 0->1, 1->0
[0, 1]], # no transformation
dtype='i1')
In [4]:
# the only shape constraint is that size of first dimension must match
assert g.shape[0] == mapping.shape[0]
In [5]:
# this is the pure numpy implementation
expect = g.map_alleles(mapping)
expect
Out[5]:
In [6]:
chunks_dim0 = 2
In [7]:
gd = da.from_array(g, chunks=(chunks_dim0, 2, None))
gd
Out[7]:
In [8]:
md = da.from_array(mapping, chunks=(chunks_dim0, None)) # N.B., first dimension chunk size matches gd1
md
Out[8]:
In [9]:
def f(block, bmapping):
return allel.GenotypeArray(block).map_alleles(bmapping[:, 0, :])
In [10]:
gmapped1 = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
gmapped1
Out[10]:
In [11]:
actual = allel.GenotypeArray(gmapped1.compute())
actual
Out[11]:
In [12]:
assert np.array_equal(expect, actual)
In [13]:
g = allel.GenotypeArray([
[[0, 0], [0, 1], [0, 0], [0, 1], [-1, -1]],
[[0, 2], [1, 1], [0, 2], [1, 1], [-1, -1]],
[[0, 0], [0, 1], [0, 0], [0, 1], [-1, -1]],
[[0, 2], [1, 1], [0, 2], [1, 1], [-1, -1]],
[[1, 0], [2, 1], [1, 0], [2, 1], [-1, -1]],
[[2, 2], [-1, -1], [2, 2], [-1, -1], [-1, -1]],
[[-1, -1], [-1, -1], [-1, -1], [-1, -1], [-1, -1]]
], dtype='i1')
g
Out[13]:
In [14]:
mapping = np.array([[0, 1, 2],
[2, 0, 1],
[1, 2, 0],
[2, 1, 0],
[1, 2, 0],
[2, 1, 0],
[2, 0, 1]], dtype=np.int8)
In [15]:
expect = g.map_alleles(mapping)
expect
Out[15]:
In [16]:
gd = da.from_array(g, chunks=(chunks_dim0, 2, None))
gd
Out[16]:
In [17]:
md = da.from_array(mapping, chunks=(chunks_dim0, None))
md
Out[17]:
In [18]:
def f(block, bmapping):
return allel.GenotypeArray(block).map_alleles(bmapping[:, 0, :])
In [19]:
res = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
res
Out[19]:
In [20]:
actual = allel.GenotypeArray(res.compute())
actual
Out[20]:
In [21]:
assert np.array_equal(expect, actual)
In [22]:
def ff(block, bmapping):
return allel.GenotypeArray(block[:, :, :, 0]).map_alleles(bmapping[:, 0, 0, :])
In [23]:
res2 = da.map_blocks(ff, gd[:, :, :, None], md[:, None, None, :], chunks=gd.chunks, drop_dims=3)
res2
Out[23]:
In [24]:
actual2 = res2.compute()
In [25]:
actual2.shape
Out[25]:
In [26]:
assert np.array_equal(expect, actual2)
In [27]:
g = allel.GenotypeArray([
[[0, 0], [0, 1], [-1, -1]],
[[0, 2], [1, 1], [-1, -1]],
[[1, 0], [2, 1], [-1, -1]],
[[2, 2], [-1, -1], [-1, -1]],
[[-1, -1], [-1, -1], [-1, -1]]
], dtype='i1')
g
Out[27]:
In [28]:
mapping = np.array([[0, 1, 2],
[2, 0, 1],
[1, 2, 0],
[2, 1, 0],
[2, 0, 1]], dtype=np.int8)
In [29]:
expect = [[[0, 0], [0, 1], [-1, -1]],
[[2, 1], [0, 0], [-1, -1]],
[[2, 1], [0, 2], [-1, -1]],
[[0, 0], [-1, -1], [-1, -1]],
[[-1, -1], [-1, -1], [-1, -1]]]
In [31]:
assert np.array_equal(expect, g.map_alleles(mapping))
In [32]:
gd = da.from_array(g, chunks=(2, 2, None))
gd
Out[32]:
In [33]:
md = da.from_array(mapping, chunks=(2, None))
md
Out[33]:
In [34]:
res = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
res
Out[34]:
In [35]:
actual = allel.GenotypeArray(res.compute())
actual
Out[35]:
In [36]:
assert np.array_equal(expect, actual)
In [ ]: