In [1]:
import numpy as np
import dask.async
import dask.array as da
import sys
sys.path.insert(0, '../..')
%load_ext autoreload
%autoreload 1
%aimport allel
%aimport allel.model
%aimport allel.model.dask
allel.__version__


Out[1]:
'0.20.0.feature_dask'

Example 1


In [2]:
g = allel.GenotypeArray([[[0, 0], [0, 1], [0, 1]],
                         [[0, 1], [1, 1], [1, 1]],
                         [[1, 1], [1, 1], [0, 0]],
                         [[1, 1], [1, 1], [0, 0]],
                         [[1, 1], [0, 1], [-1, -1]]], dtype='i1')
g


Out[2]:
GenotypeArray((5, 3, 2), dtype=int8)
0 1 2
0 0/0 0/1 0/1
1 0/1 1/1 1/1
2 1/1 1/1 0/0
3 1/1 1/1 0/0
4 1/1 0/1 ./.

In [3]:
# the mapping array is effectively a look-up table, telling how to transform
# integer values in each row
mapping = np.array([[0, 1],  # no transformation
                    [1, 0],  # 0->1, 1->0
                    [0, 0],  # 0->0, 1->0
                    [1, 0],  # 0->1, 1->0
                    [0, 1]], # no transformation
                   dtype='i1')

In [4]:
# the only shape constraint is that size of first dimension must match
assert g.shape[0] == mapping.shape[0]

In [5]:
# this is the pure numpy implementation
expect = g.map_alleles(mapping)
expect


Out[5]:
GenotypeArray((5, 3, 2), dtype=int8)
0 1 2
0 0/0 0/1 0/1
1 1/0 0/0 0/0
2 0/0 0/0 0/0
3 0/0 0/0 1/1
4 1/1 0/1 ./.

In [6]:
chunks_dim0 = 2

In [7]:
gd = da.from_array(g, chunks=(chunks_dim0, 2, None))
gd


Out[7]:
dask.array<from-ar..., shape=(5, 3, 2), dtype=int8, chunksize=(2, 2, 2)>

In [8]:
md = da.from_array(mapping, chunks=(chunks_dim0, None))  # N.B., first dimension chunk size matches gd1
md


Out[8]:
dask.array<from-ar..., shape=(5, 2), dtype=int8, chunksize=(2, 2)>

In [9]:
def f(block, bmapping):
    return allel.GenotypeArray(block).map_alleles(bmapping[:, 0, :])

In [10]:
gmapped1 = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
gmapped1


Out[10]:
dask.array<map-blo..., shape=(5, 3, 2), dtype=None, chunksize=(2, 2, 2)>

In [11]:
actual = allel.GenotypeArray(gmapped1.compute())
actual


Out[11]:
GenotypeArray((5, 3, 2), dtype=int8)
0 1 2
0 0/0 0/1 0/1
1 1/0 0/0 0/0
2 0/0 0/0 0/0
3 0/0 0/0 1/1
4 1/1 0/1 ./.

In [12]:
assert np.array_equal(expect, actual)

Example 2


In [13]:
g = allel.GenotypeArray([
    [[0, 0], [0, 1], [0, 0], [0, 1], [-1, -1]],
    [[0, 2], [1, 1], [0, 2], [1, 1], [-1, -1]],
    [[0, 0], [0, 1], [0, 0], [0, 1], [-1, -1]],
    [[0, 2], [1, 1], [0, 2], [1, 1], [-1, -1]],
    [[1, 0], [2, 1], [1, 0], [2, 1], [-1, -1]],
    [[2, 2], [-1, -1], [2, 2], [-1, -1], [-1, -1]],
    [[-1, -1], [-1, -1], [-1, -1], [-1, -1], [-1, -1]]
], dtype='i1')
g


Out[13]:
GenotypeArray((7, 5, 2), dtype=int8)
0 1 2 3 4
0 0/0 0/1 0/0 0/1 ./.
1 0/2 1/1 0/2 1/1 ./.
2 0/0 0/1 0/0 0/1 ./.
3 0/2 1/1 0/2 1/1 ./.
4 1/0 2/1 1/0 2/1 ./.

...


In [14]:
mapping = np.array([[0, 1, 2],
                    [2, 0, 1],
                    [1, 2, 0],
                    [2, 1, 0],
                    [1, 2, 0],
                    [2, 1, 0],
                    [2, 0, 1]], dtype=np.int8)

In [15]:
expect = g.map_alleles(mapping)
expect


Out[15]:
GenotypeArray((7, 5, 2), dtype=int8)
0 1 2 3 4
0 0/0 0/1 0/0 0/1 ./.
1 2/1 0/0 2/1 0/0 ./.
2 1/1 1/2 1/1 1/2 ./.
3 2/0 1/1 2/0 1/1 ./.
4 2/1 0/2 2/1 0/2 ./.

...


In [16]:
gd = da.from_array(g, chunks=(chunks_dim0, 2, None))
gd


Out[16]:
dask.array<from-ar..., shape=(7, 5, 2), dtype=int8, chunksize=(2, 2, 2)>

In [17]:
md = da.from_array(mapping, chunks=(chunks_dim0, None))
md


Out[17]:
dask.array<from-ar..., shape=(7, 3), dtype=int8, chunksize=(2, 3)>

In [18]:
def f(block, bmapping):
    return allel.GenotypeArray(block).map_alleles(bmapping[:, 0, :])

In [19]:
res = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
res


Out[19]:
dask.array<map-blo..., shape=(7, 5, 2), dtype=None, chunksize=(2, 2, 2)>

In [20]:
actual = allel.GenotypeArray(res.compute())
actual


Out[20]:
GenotypeArray((7, 5, 2), dtype=int8)
0 1 2 3 4
0 0/0 0/1 0/0 0/1 ./.
1 2/1 0/0 2/1 0/0 ./.
2 1/1 1/2 1/1 1/2 ./.
3 2/0 1/1 2/0 1/1 ./.
4 2/1 0/2 2/1 0/2 ./.

...


In [21]:
assert np.array_equal(expect, actual)

Alternative solution


In [22]:
def ff(block, bmapping):
    return allel.GenotypeArray(block[:, :, :, 0]).map_alleles(bmapping[:, 0, 0, :])

In [23]:
res2 = da.map_blocks(ff, gd[:, :, :, None], md[:, None, None, :], chunks=gd.chunks, drop_dims=3)
res2


Out[23]:
dask.array<map-blo..., shape=(7, 5, 2), dtype=None, chunksize=(2, 2, 2)>

In [24]:
actual2 = res2.compute()

In [25]:
actual2.shape


Out[25]:
(7, 5, 2)

In [26]:
assert np.array_equal(expect, actual2)

Example 3


In [27]:
g = allel.GenotypeArray([
    [[0, 0], [0, 1], [-1, -1]],
    [[0, 2], [1, 1], [-1, -1]],
    [[1, 0], [2, 1], [-1, -1]],
    [[2, 2], [-1, -1], [-1, -1]],
    [[-1, -1], [-1, -1], [-1, -1]]
], dtype='i1')
g


Out[27]:
GenotypeArray((5, 3, 2), dtype=int8)
0 1 2
0 0/0 0/1 ./.
1 0/2 1/1 ./.
2 1/0 2/1 ./.
3 2/2 ./. ./.
4 ./. ./. ./.

In [28]:
mapping = np.array([[0, 1, 2],
                    [2, 0, 1],
                    [1, 2, 0],
                    [2, 1, 0],
                    [2, 0, 1]], dtype=np.int8)

In [29]:
expect = [[[0, 0], [0, 1], [-1, -1]],
          [[2, 1], [0, 0], [-1, -1]],
          [[2, 1], [0, 2], [-1, -1]],
          [[0, 0], [-1, -1], [-1, -1]],
          [[-1, -1], [-1, -1], [-1, -1]]]

In [31]:
assert np.array_equal(expect, g.map_alleles(mapping))

In [32]:
gd = da.from_array(g, chunks=(2, 2, None))
gd


Out[32]:
dask.array<from-ar..., shape=(5, 3, 2), dtype=int8, chunksize=(2, 2, 2)>

In [33]:
md = da.from_array(mapping, chunks=(2, None))
md


Out[33]:
dask.array<from-ar..., shape=(5, 3), dtype=int8, chunksize=(2, 3)>

In [34]:
res = da.map_blocks(f, gd, md[:, None, :], chunks=gd.chunks)
res


Out[34]:
dask.array<map-blo..., shape=(5, 3, 2), dtype=None, chunksize=(2, 2, 2)>

In [35]:
actual = allel.GenotypeArray(res.compute())
actual


Out[35]:
GenotypeArray((5, 3, 2), dtype=int8)
0 1 2
0 0/0 0/1 ./.
1 2/1 0/0 ./.
2 2/1 0/2 ./.
3 0/0 ./. ./.
4 ./. ./. ./.

In [36]:
assert np.array_equal(expect, actual)

In [ ]: