In [1]:
import sys
sys.path.insert(0, '../')
import numpy as np
%reload_ext autoreload
%autoreload 1
%aimport allel
%aimport allel.model.ndarray
%aimport allel.model.chunked
%aimport allel.model.dask
allel.__version__


Out[1]:
'1.1.0.dev0'

GenotypeVector


In [2]:
g = allel.GenotypeVector([[0, 1], [-1, -1]]*20)
g.is_phased = np.array([True, False]*20, dtype=bool)
g


Out[2]:
<GenotypeVector shape=(40, 2) dtype=int64>
01234...3536373839
0|1./.0|1./.0|1..../.0|1./.0|1./.

In [3]:
g.display(30, 10)


<GenotypeVector shape=(40, 2) dtype=int64>
0123456789...30313233343536373839
0|1./.0|1./.0|1./.0|1./.0|1./....0|1./.0|1./.0|1./.0|1./.0|1./.

In [4]:
g.displayall()


<GenotypeVector shape=(40, 2) dtype=int64>
0123456789101112131415161718192021222324252627282930313233343536373839
0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.0|1./.

In [5]:
print(repr(g))


<GenotypeVector shape=(40, 2) dtype=int64>
0|1 ./. 0|1 ./. 0|1 ... ./. 0|1 ./. 0|1 ./.

In [6]:
print(g)


0|1 ./. 0|1 ./. 0|1 ... ./. 0|1 ./. 0|1 ./.

In [7]:
print(g.to_str(40))


0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./. 0|1 ./.

In [8]:
g.concatenate(g)


Out[8]:
<GenotypeVector shape=(80, 2) dtype=int64>
01234...7576777879
0|1./.0|1./.0|1..../.0|1./.0|1./.

In [9]:
g.concatenate(g, axis=1)


Out[9]:
array([[ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1],
       [ 0,  1,  0,  1],
       [-1, -1, -1, -1]])

GenotypeArray


In [10]:
g = allel.GenotypeArray([[[0, 1], [-1, -1]]*20]*20)
g.is_phased = np.array([[True, False, False, False]*10]*20, dtype=bool)
g.mask = np.array([[1, 0, 0, 0, 0]*8]*20, dtype=bool)

In [11]:
print(g)


.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
...
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.


In [12]:
print(repr(g))


<GenotypeArray shape=(20, 40, 2) dtype=int64>
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
...
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.
.|. ./. 0/1 ./. 0|1 ... ./. 0|1 ./. 0/1 ./.


In [13]:
g


Out[13]:
<GenotypeArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
0.|../.0/1./.0|1..../.0|1./.0/1./.
1.|../.0/1./.0|1..../.0|1./.0/1./.
2.|../.0/1./.0|1..../.0|1./.0/1./.
......
17.|../.0/1./.0|1..../.0|1./.0/1./.
18.|../.0/1./.0|1..../.0|1./.0/1./.
19.|../.0/1./.0|1..../.0|1./.0/1./.

In [14]:
g.display(row_threshold=10, row_edgeitems=5, col_threshold=20, col_edgeitems=10)


<GenotypeArray shape=(20, 40, 2) dtype=int64>
0123456789...30313233343536373839
0.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
1.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
2.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
3.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
4.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
......
15.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
16.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
17.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
18.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.
19.|../.0/1./.0|1./.0/1./.0|1./...../../.0|1./.0/1./.0|1./.0/1./.

In [15]:
g.displayall()


<GenotypeArray shape=(20, 40, 2) dtype=int64>
0123456789101112131415161718192021222324252627282930313233343536373839
0.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
1.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
2.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
3.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
4.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
5.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
6.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
7.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
8.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
9.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
10.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
11.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
12.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
13.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
14.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
15.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
16.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
17.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
18.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.
19.|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./..|../.0/1./.0|1./.0/1./.0|1./../../.0|1./.0/1./.0|1./.0/1./.

In [16]:
g[0]


Out[16]:
<GenotypeVector shape=(40, 2) dtype=int64>
01234...3536373839
.|../.0/1./.0|1..../.0|1./.0/1./.

In [17]:
g[:, 4]


Out[17]:
<GenotypeVector shape=(20, 2) dtype=int64>
01234...1516171819
0|10|10|10|10|1...0|10|10|10|10|1

In [18]:
g[:, 4].displayall()


<GenotypeVector shape=(20, 2) dtype=int64>
012345678910111213141516171819
0|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|10|1

In [19]:
g.mask = None
g


Out[19]:
<GenotypeArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
00|1./.0/1./.0|1..../.0|1./.0/1./.
10|1./.0/1./.0|1..../.0|1./.0/1./.
20|1./.0/1./.0|1..../.0|1./.0/1./.
......
170|1./.0/1./.0|1..../.0|1./.0/1./.
180|1./.0/1./.0|1..../.0|1./.0/1./.
190|1./.0/1./.0|1..../.0|1./.0/1./.

In [20]:
g.displayall()


<GenotypeArray shape=(20, 40, 2) dtype=int64>
0123456789101112131415161718192021222324252627282930313233343536373839
00|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
10|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
20|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
30|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
40|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
50|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
60|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
70|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
80|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
90|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
100|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
110|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
120|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
130|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
140|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
150|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
160|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
170|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
180|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.
190|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.0|1./.0/1./.

In [21]:
g.concatenate(g)


Out[21]:
<GenotypeArray shape=(40, 40, 2) dtype=int64>
01234...3536373839
00|1./.0/1./.0|1..../.0|1./.0/1./.
10|1./.0/1./.0|1..../.0|1./.0/1./.
20|1./.0/1./.0|1..../.0|1./.0/1./.
......
370|1./.0/1./.0|1..../.0|1./.0/1./.
380|1./.0/1./.0|1..../.0|1./.0/1./.
390|1./.0/1./.0|1..../.0|1./.0/1./.

In [22]:
g.concatenate(g, axis=1)


Out[22]:
<GenotypeArray shape=(20, 80, 2) dtype=int64>
01234...7576777879
00|1./.0/1./.0|1..../.0|1./.0/1./.
10|1./.0/1./.0|1..../.0|1./.0/1./.
20|1./.0/1./.0|1..../.0|1./.0/1./.
......
170|1./.0/1./.0|1..../.0|1./.0/1./.
180|1./.0/1./.0|1..../.0|1./.0/1./.
190|1./.0/1./.0|1..../.0|1./.0/1./.

In [23]:
g.to_n_alt()


Out[23]:
array([[1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]], dtype=int8)

HaplotypeArray


In [24]:
h = g.to_haplotypes()

In [25]:
h


Out[25]:
<HaplotypeArray shape=(20, 80) dtype=int64>
01234...7576777879
001..0....01..
101..0....01..
201..0....01..
......
1701..0....01..
1801..0....01..
1901..0....01..

In [26]:
print(repr(h))


<HaplotypeArray shape=(20, 80) dtype=int64>
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
...
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .


In [27]:
print(h)


0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
...
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .
0 1 . . 0 ... . 0 1 . .


In [28]:
h.display(20, 40)


<HaplotypeArray shape=(20, 80) dtype=int64>
01234...7576777879
001..0....01..
101..0....01..
201..0....01..
301..0....01..
401..0....01..
501..0....01..
601..0....01..
701..0....01..
801..0....01..
901..0....01..
1001..0....01..
1101..0....01..
1201..0....01..
1301..0....01..
1401..0....01..
1501..0....01..
1601..0....01..
1701..0....01..
1801..0....01..
1901..0....01..

In [29]:
h.displayall()


<HaplotypeArray shape=(20, 80) dtype=int64>
012345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
001..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
101..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
201..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
301..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
401..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
501..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
601..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
701..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
801..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
901..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1001..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1101..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1201..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1301..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1401..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1501..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1601..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1701..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1801..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..
1901..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..01..

In [30]:
h.concatenate(h)


Out[30]:
<HaplotypeArray shape=(40, 80) dtype=int64>
01234...7576777879
001..0....01..
101..0....01..
201..0....01..
......
3701..0....01..
3801..0....01..
3901..0....01..

In [31]:
h.concatenate(h, axis=1)


Out[31]:
<HaplotypeArray shape=(20, 160) dtype=int64>
01234...155156157158159
001..0....01..
101..0....01..
201..0....01..
......
1701..0....01..
1801..0....01..
1901..0....01..

AlleleCountsArray


In [32]:
ac = g.count_alleles()
ac


Out[32]:
<AlleleCountsArray shape=(20, 2) dtype=int32>
01
02020
12020
22020
......
172020
182020
192020

In [33]:
ac.displayall()


<AlleleCountsArray shape=(20, 2) dtype=int32>
01
02020
12020
22020
32020
42020
52020
62020
72020
82020
92020
102020
112020
122020
132020
142020
152020
162020
172020
182020
192020

In [34]:
print(ac)


20 20
20 20
20 20
...
20 20
20 20
20 20


In [35]:
print(repr(ac))


<AlleleCountsArray shape=(20, 2) dtype=int32>
20 20
20 20
20 20
...
20 20
20 20
20 20


In [36]:
ac + 5


Out[36]:
<AlleleCountsArray shape=(20, 2) dtype=int32>
01
02525
12525
22525
......
172525
182525
192525

In [37]:
ac + ac


Out[37]:
<AlleleCountsArray shape=(20, 2) dtype=int32>
01
04040
14040
24040
......
174040
184040
194040

In [38]:
ac.concatenate(ac)


Out[38]:
<AlleleCountsArray shape=(40, 2) dtype=int32>
01
02020
12020
22020
......
372020
382020
392020

In [39]:
ac.concatenate(ac, axis=1)


Out[39]:
array([[20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20],
       [20, 20, 20, 20]], dtype=int32)

GenotypeAlleleCounts


In [40]:
gac = g.to_allele_counts()
gac


Out[40]:
<GenotypeAlleleCountsArray shape=(20, 40, 2) dtype=uint8>
01234...3536373839
01:10:01:10:01:1...0:01:10:01:10:0
11:10:01:10:01:1...0:01:10:01:10:0
21:10:01:10:01:1...0:01:10:01:10:0
......
171:10:01:10:01:1...0:01:10:01:10:0
181:10:01:10:01:1...0:01:10:01:10:0
191:10:01:10:01:1...0:01:10:01:10:0

In [41]:
print(gac)


1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
...
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0


In [42]:
print(repr(gac))


<GenotypeAlleleCountsArray shape=(20, 40, 2) dtype=uint8>
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
...
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0
1:1 0:0 1:1 0:0 1:1 ... 0:0 1:1 0:0 1:1 0:0


In [43]:
g[:, 0] = 2

In [44]:
g.to_allele_counts()


Out[44]:
<GenotypeAlleleCountsArray shape=(20, 40, 3) dtype=uint8>
01234...3536373839
00:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
10:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
20:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
......
170:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
180:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
190:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0

In [45]:
gac[:5]


Out[45]:
<GenotypeAlleleCountsArray shape=(5, 40, 2) dtype=uint8>
01234...3536373839
01:10:01:10:01:1...0:01:10:01:10:0
11:10:01:10:01:1...0:01:10:01:10:0
21:10:01:10:01:1...0:01:10:01:10:0
31:10:01:10:01:1...0:01:10:01:10:0
41:10:01:10:01:1...0:01:10:01:10:0

In [46]:
gac[0]


Out[46]:
<GenotypeAlleleCountsVector shape=(40, 2) dtype=uint8>
01234...3536373839
1:10:01:10:01:1...0:01:10:01:10:0

In [47]:
gac[:, 0]


Out[47]:
<GenotypeAlleleCountsVector shape=(20, 2) dtype=uint8>
01234...1516171819
1:11:11:11:11:1...1:11:11:11:11:1

In [48]:
gac[:3].is_called()


Out[48]:
array([[ True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False],
       [ True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False],
       [ True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False,  True, False,  True, False,  True,
        False,  True, False,  True, False,  True, False,  True, False,
         True, False,  True, False]], dtype=bool)

In [49]:
gac[0].is_called()


Out[49]:
array([ True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False,  True, False,  True, False,  True,
       False,  True, False,  True, False,  True, False,  True, False,
        True, False,  True, False], dtype=bool)

VariantTable


In [50]:
pos = np.unique(np.random.randint(0, 1000000, size=50000))
ref = np.random.choice([b'A', b'C', b'T', b'G'], size=pos.shape[0])
alt = np.random.choice([b'A', b'C', b'T', b'G'], size=pos.shape[0])
ra = np.rec.fromarrays([pos, ref, alt], names=['POS', 'REF', 'ALT'])
vtbl = allel.VariantTable(ra)
vtbl


Out[50]:
<VariantTable shape=(48772,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
......
48769999938b'C'b'C'
48770999977b'C'b'C'
48771999993b'G'b'G'

In [51]:
vtbl[:5]


Out[51]:
<VariantTable shape=(5,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
361b'G'b'G'
494b'T'b'G'

In [52]:
vtbl[:20].displayall()


<VariantTable shape=(20,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
361b'G'b'G'
494b'T'b'G'
5109b'G'b'C'
6165b'A'b'A'
7170b'G'b'C'
8174b'T'b'A'
9179b'T'b'G'
10198b'T'b'A'
11204b'A'b'A'
12205b'A'b'A'
13214b'C'b'C'
14217b'C'b'T'
15221b'T'b'A'
16245b'C'b'G'
17250b'C'b'T'
18261b'T'b'C'
19357b'G'b'C'

In [53]:
vtbl.take(list(range(5)))


Out[53]:
<VariantTable shape=(5,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
361b'G'b'G'
494b'T'b'G'

In [54]:
vtbl.compress(np.random.randint(0, 2, size=vtbl.shape[0]).astype(bool))


Out[54]:
<VariantTable shape=(24495,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
022b'C'b'G'
1109b'G'b'C'
2179b'T'b'G'
......
24492999898b'G'b'T'
24493999938b'C'b'C'
24494999977b'C'b'C'

In [55]:
vtbl.concatenate(vtbl)


Out[55]:
<VariantTable shape=(97544,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
......
97541999938b'C'b'C'
97542999977b'C'b'C'
97543999993b'G'b'G'

In [56]:
vtbl[['POS', 'ALT']]


/home/aliman/pyenv/dev-scikit-allel-py36-20170614/lib/python3.6/site-packages/numpy/core/records.py:507: FutureWarning: Numpy has detected that you may be viewing or writing to an array returned by selecting multiple fields in a structured array. 

This code may break in numpy 1.13 because this will return a view instead of a copy -- see release notes for details.
  return obj.view(dtype=(self.dtype.type, obj.dtype))
Out[56]:
<VariantTable shape=(48772,) dtype=(numpy.record, [('POS', '<i8'), ('ALT', 'S1')])>
POSALT
016b'G'
122b'G'
254b'G'
......
48769999938b'C'
48770999977b'C'
48771999993b'G'

In [57]:
print(vtbl)


[(    16, b'G', b'G') (    22, b'C', b'G') (    54, b'G', b'G') ...,
 (999938, b'C', b'C') (999977, b'C', b'C') (999993, b'G', b'G')]

In [58]:
print(repr(vtbl))


<VariantTable shape=(48772,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
[(    16, b'G', b'G') (    22, b'C', b'G') (    54, b'G', b'G') ...,
 (999938, b'C', b'C') (999977, b'C', b'C') (999993, b'G', b'G')]

Chunked stuff


In [59]:
gc = allel.GenotypeChunkedArray(g).copy()
gc


Out[59]:
<GenotypeChunkedArray shape=(20, 40, 2) dtype=int64 chunks=(20, 40, 2) nbytes=12.5K cbytes=450 cratio=28.4 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
172/2./.0/1./.0/1..../.0/1./.0/1./.
182/2./.0/1./.0/1..../.0/1./.0/1./.
192/2./.0/1./.0/1..../.0/1./.0/1./.

In [60]:
print(gc)


2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
...
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.


In [61]:
print(repr(gc))


<GenotypeChunkedArray shape=(20, 40, 2) dtype=int64 chunks=(20, 40, 2)
   nbytes=12.5K cbytes=450 cratio=28.4
   compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1}
   values=zarr.core.Array>

In [62]:
gc[:5]


Out[62]:
<GenotypeArray shape=(5, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
32/2./.0/1./.0/1..../.0/1./.0/1./.
42/2./.0/1./.0/1..../.0/1./.0/1./.

In [63]:
gc[:, :5]


Out[63]:
<GenotypeArray shape=(20, 5, 2) dtype=int64>
01234
02/2./.0/1./.0/1
12/2./.0/1./.0/1
22/2./.0/1./.0/1
......
172/2./.0/1./.0/1
182/2./.0/1./.0/1
192/2./.0/1./.0/1

In [64]:
gc[5]


Out[64]:
<GenotypeVector shape=(40, 2) dtype=int64>
01234...3536373839
2/2./.0/1./.0/1..../.0/1./.0/1./.

In [65]:
gc[:, 5]


Out[65]:
<GenotypeVector shape=(20, 2) dtype=int64>
01234...1516171819
./../../../../...../../../../../.

In [66]:
x = gc.take([0, 2, 4], axis=0)
x


Out[66]:
<GenotypeChunkedArray shape=(3, 40, 2) dtype=int64 chunks=(3, 40, 2) nbytes=1.9K cbytes=406 cratio=4.7 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.

In [67]:
gc.take([0, 1, 2], axis=1)


Out[67]:
<GenotypeChunkedArray shape=(20, 3, 2) dtype=int64 chunks=(20, 3, 2) nbytes=960 cbytes=386 cratio=2.5 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
012
02/2./.0/1
12/2./.0/1
22/2./.0/1
......
172/2./.0/1
182/2./.0/1
192/2./.0/1

In [68]:
gcna = gc.to_n_alt()
gcna


Out[68]:
<ChunkedArrayWrapper shape=(20, 40) dtype=int8 chunks=(20, 40)
   nbytes=800 cbytes=360 cratio=2.2
   compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1}
   values=zarr.core.Array>

In [69]:
gcna > 0


Out[69]:
<ChunkedArrayWrapper shape=(20, 40) dtype=bool chunks=(20, 40)
   nbytes=800 cbytes=352 cratio=2.3
   compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1}
   values=zarr.core.Array>

In [70]:
hc = gc.to_haplotypes()
hc


Out[70]:
<HaplotypeChunkedArray shape=(20, 80) dtype=int64 chunks=(20, 80) nbytes=12.5K cbytes=428 cratio=29.9 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
01234...7576777879
022..0....01..
122..0....01..
222..0....01..
......
1722..0....01..
1822..0....01..
1922..0....01..

In [71]:
hc.take([0, 1, 2], axis=1)


Out[71]:
<HaplotypeChunkedArray shape=(20, 3) dtype=int64 chunks=(20, 3) nbytes=480 cbytes=361 cratio=1.3 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
012
022.
122.
222.
......
1722.
1822.
1922.

In [72]:
ac = g.count_alleles()
ac


Out[72]:
<AlleleCountsArray shape=(20, 3) dtype=int32>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [73]:
acc = gc.count_alleles()
acc


Out[73]:
<AlleleCountsChunkedArray shape=(20, 3) dtype=int32 chunks=(20, 3) nbytes=240 cbytes=356 cratio=0.7 compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1} values=zarr.core.Array>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [74]:
gc.is_called()


Out[74]:
<ChunkedArrayWrapper shape=(20, 40) dtype=bool chunks=(20, 40)
   nbytes=800 cbytes=352 cratio=2.3
   compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1}
   values=zarr.core.Array>

In [75]:
gc.to_n_alt()


Out[75]:
<ChunkedArrayWrapper shape=(20, 40) dtype=int8 chunks=(20, 40)
   nbytes=800 cbytes=360 cratio=2.2
   compression=blosc compression_opts={'cname': 'lz4', 'clevel': 5, 'shuffle': 1}
   values=zarr.core.Array>

In [76]:
vctbl = allel.VariantChunkedTable(vtbl).copy()
vctbl


Out[76]:
<VariantChunkedTable shape=(48772,) dtype=[('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')] nbytes=476.3K cbytes=127.4K cratio=3.7 values=allel.chunked.storage_zarr.ZarrTable>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
......
48769999938b'C'b'C'
48770999977b'C'b'C'
48771999993b'G'b'G'

In [77]:
vctbl[:]


Out[77]:
<VariantTable shape=(48772,) dtype=(numpy.record, [('POS', '<i8'), ('REF', 'S1'), ('ALT', 'S1')])>
POSREFALT
016b'G'b'G'
122b'C'b'G'
254b'G'b'G'
......
48769999938b'C'b'C'
48770999977b'C'b'C'
48771999993b'G'b'G'

Dask arrays


In [78]:
g


Out[78]:
<GenotypeArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
02|2./.0/1./.0|1..../.0|1./.0/1./.
12|2./.0/1./.0|1..../.0|1./.0/1./.
22|2./.0/1./.0|1..../.0|1./.0/1./.
......
172|2./.0/1./.0|1..../.0|1./.0/1./.
182|2./.0/1./.0|1..../.0|1./.0/1./.
192|2./.0/1./.0|1..../.0|1./.0/1./.

In [79]:
gd = allel.GenotypeDaskArray(g)

In [80]:
gd.mask is None


Out[80]:
True

In [81]:
gd.is_phased is None


Out[81]:
True

In [82]:
print(repr(gd))


<GenotypeDaskArray shape=(20, 40, 2) dtype=int64>

In [83]:
print(gd)


2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
...
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.
2/2 ./. 0/1 ./. 0/1 ... ./. 0/1 ./. 0/1 ./.


In [84]:
gd


Out[84]:
<GenotypeDaskArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
172/2./.0/1./.0/1..../.0/1./.0/1./.
182/2./.0/1./.0/1..../.0/1./.0/1./.
192/2./.0/1./.0/1..../.0/1./.0/1./.

In [85]:
gd.values


Out[85]:
dask.array<array, shape=(20, 40, 2), dtype=int64, chunksize=(20, 40, 2)>

In [86]:
gd.values.name


Out[86]:
'array-02899a22ff9fd8802550da9027abb49d'

In [87]:
gd[:]


Out[87]:
<GenotypeDaskArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
172/2./.0/1./.0/1..../.0/1./.0/1./.
182/2./.0/1./.0/1..../.0/1./.0/1./.
192/2./.0/1./.0/1..../.0/1./.0/1./.

In [88]:
gd[:5]


Out[88]:
<GenotypeDaskArray shape=(5, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
32/2./.0/1./.0/1..../.0/1./.0/1./.
42/2./.0/1./.0/1..../.0/1./.0/1./.

In [89]:
gd.compute()


Out[89]:
<GenotypeArray shape=(20, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
172/2./.0/1./.0/1..../.0/1./.0/1./.
182/2./.0/1./.0/1..../.0/1./.0/1./.
192/2./.0/1./.0/1..../.0/1./.0/1./.

In [90]:
gd.concatenate(gd)


Out[90]:
<GenotypeDaskArray shape=(40, 40, 2) dtype=int64>
01234...3536373839
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
372/2./.0/1./.0/1..../.0/1./.0/1./.
382/2./.0/1./.0/1..../.0/1./.0/1./.
392/2./.0/1./.0/1..../.0/1./.0/1./.

In [91]:
gd.concatenate(gd, axis=1)


Out[91]:
<GenotypeDaskArray shape=(20, 80, 2) dtype=int64>
01234...7576777879
02/2./.0/1./.0/1..../.0/1./.0/1./.
12/2./.0/1./.0/1..../.0/1./.0/1./.
22/2./.0/1./.0/1..../.0/1./.0/1./.
......
172/2./.0/1./.0/1..../.0/1./.0/1./.
182/2./.0/1./.0/1..../.0/1./.0/1./.
192/2./.0/1./.0/1..../.0/1./.0/1./.

In [92]:
hd = gd.to_haplotypes()
hd


Out[92]:
<HaplotypeDaskArray shape=(20, 80) dtype=int64>
01234...7576777879
022..0....01..
122..0....01..
222..0....01..
......
1722..0....01..
1822..0....01..
1922..0....01..

In [93]:
hd.compute()


Out[93]:
<HaplotypeArray shape=(20, 80) dtype=int64>
01234...7576777879
022..0....01..
122..0....01..
222..0....01..
......
1722..0....01..
1822..0....01..
1922..0....01..

In [94]:
acd = gd.count_alleles()
acd


Out[94]:
<AlleleCountsDaskArray shape=(20, 3) dtype=int64>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [95]:
acd.compute()


Out[95]:
<AlleleCountsArray shape=(20, 3) dtype=int64>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [96]:
gd[4]


Out[96]:
<GenotypeDaskVector shape=(40, 2) dtype=int64>
01234...3536373839
2/2./.0/1./.0/1..../.0/1./.0/1./.

In [97]:
gd[4].compute()


Out[97]:
<GenotypeVector shape=(40, 2) dtype=int64>
01234...3536373839
2/2./.0/1./.0/1..../.0/1./.0/1./.

In [98]:
acd + acd


Out[98]:
<AlleleCountsDaskArray shape=(20, 3) dtype=int64>
012
03838 4
13838 4
23838 4
......
173838 4
183838 4
193838 4

In [99]:
(acd + acd).compute()


Out[99]:
<AlleleCountsArray shape=(20, 3) dtype=int64>
012
03838 4
13838 4
23838 4
......
173838 4
183838 4
193838 4

In [100]:
gacd = allel.GenotypeAlleleCountsDaskArray(g.to_allele_counts())
gacd


Out[100]:
<GenotypeAlleleCountsDaskArray shape=(20, 40, 3) dtype=uint8>
01234...3536373839
00:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
10:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
20:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
......
170:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
180:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
190:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0

In [101]:
gacd.compute()


Out[101]:
<GenotypeAlleleCountsArray shape=(20, 40, 3) dtype=uint8>
01234...3536373839
00:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
10:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
20:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
......
170:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
180:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0
190:0:20:0:01:1:00:0:01:1:0...0:0:01:1:00:0:01:1:00:0:0

In [102]:
gacd.count_alleles()


Out[102]:
<AlleleCountsDaskArray shape=(20, 3) dtype=uint64>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [103]:
gacd.count_alleles().compute()


Out[103]:
<AlleleCountsArray shape=(20, 3) dtype=uint64>
012
01919 2
11919 2
21919 2
......
171919 2
181919 2
191919 2

In [ ]:


In [ ]: