In [2]:
import numpy as np
np.random.seed(42)
import sys
import cProfile
import humanize
# import imp
# import logging
# imp.reload(logging)
# logging.basicConfig(level=logging.DEBUG)
sys.path.insert(0, '../..')
%reload_ext memory_profiler
%reload_ext autoreload
%autoreload 1
%aimport allel.model
%aimport allel.stats.fst
%aimport allel.stats
%aimport allel.plot
%aimport allel.opt.stats
In [4]:
g = [[[0, 0], [0, 0], [1, 1], [1, 1]],
[[0, 1], [0, 1], [0, 1], [0, 1]],
[[0, 0], [0, 0], [0, 0], [0, 0]],
[[0, 1], [1, 2], [1, 1], [2, 2]],
[[0, 0], [1, 1], [0, 1], [-1, -1]]]
subpops = [[0, 1], [2, 3]]
g = allel.GenotypeArray(g)
ac1 = g.count_alleles(subpop=subpops[0])
ac2 = g.count_alleles(subpop=subpops[1])
In [5]:
a, b, c = allel.stats.weir_cockerham_fst(g, subpops)
a[:, 1], b[:, 1], c[:, 1]
Out[5]:
In [6]:
a[:, 1] / (a[:, 1] + b[:, 1] + c[:, 1])
Out[6]:
In [7]:
a[:, 2] / (a[:, 2] + b[:, 2] + c[:, 2])
Out[7]:
In [8]:
np.sum(a, axis=1) / (np.sum(a, axis=1) + np.sum(b, axis=1) + np.sum(c, axis=1))
Out[8]:
In [9]:
num, den = allel.stats.hudson_fst(ac1, ac2)
num, den
Out[9]:
In [10]:
num / den
Out[10]:
In [11]:
g2 = [[[0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [1, 1], [1, 1]],
[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]],
[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[0, 1], [1, 2], [0, 1], [1, 2], [1, 1], [2, 2], [1, 1], [2, 2]],
[[0, 0], [1, 1], [0, 0], [1, 1], [0, 1], [-1, -1], [0, 1], [-1, -1]]]
subpops2 = [[0, 1, 2, 3], [4, 5, 6, 7]]
g2 = allel.GenotypeArray(g2)
ac21 = g2.count_alleles(subpop=subpops2[0])
ac22 = g2.count_alleles(subpop=subpops2[1])
In [13]:
a, b, c = allel.stats.weir_cockerham_fst(g2, subpops2)
a[:, 1], b[:, 1], c[:, 1]
Out[13]:
In [14]:
a[:, 1] / (a[:, 1] + b[:, 1] + c[:, 1])
Out[14]:
In [15]:
num, den = allel.stats.hudson_fst(ac21, ac22)
num, den
Out[15]:
In [16]:
num / den
Out[16]:
In [17]:
g3 = [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1], [1, 1], [1, 1]],
[[0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1], [0, 1]],
[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
[[0, 1], [1, 2], [0, 1], [1, 2], [0, 1], [1, 2], [0, 1], [1, 2], [1, 1], [2, 2], [1, 1], [2, 2]],
[[0, 0], [1, 1], [0, 0], [1, 1], [0, 0], [1, 1], [0, 0], [1, 1], [0, 1], [-1, -1], [0, 1], [-1, -1]]]
subpops3 = [list(range(8)), list(range(8, 12))]
g3 = allel.GenotypeArray(g3)
ac31 = g3.count_alleles(subpop=subpops3[0])
ac32 = g3.count_alleles(subpop=subpops3[1])
In [18]:
a, b, c = allel.stats.weir_cockerham_fst(g3, subpops3)
a[:, 1], b[:, 1], c[:, 1]
Out[18]:
In [19]:
a[:, 1] / (a[:, 1] + b[:, 1] + c[:, 1])
Out[19]:
In [20]:
num, den = allel.stats.hudson_fst(ac31, ac32)
num, den
Out[20]:
In [21]:
num / den
Out[21]:
In [22]:
# setup an array of genotype calls
shape = n_variants, n_samples, ploidy = 50000, 1000, 2
data = np.zeros(shape, dtype='i1')
# simulate some mutations
n_alleles = n_variants * n_samples * ploidy
idx = np.random.randint(0, (n_alleles//2)-1, size=n_alleles//20)
data[:, :, 1].reshape((-1))[idx] = 1
data[:, :, 0].reshape((-1))[idx[:n_alleles//200]] = 1
In [23]:
g = allel.GenotypeArray(data, copy=False)
g
Out[23]:
In [24]:
subpops = [np.arange(0, 500), np.arange(500, 1000)]
In [25]:
a, b, c = allel.stats.weir_cockerham_fst(g, subpops)
In [26]:
fst = a[:, 0] / (a[:, 0] + b[:, 0] + c[:, 0])
fst
Out[26]:
In [27]:
fst = a[:, 1] / (a[:, 1] + b[:, 1] + c[:, 1])
fst
Out[27]:
In [28]:
ac1 = g.count_alleles(subpop=subpops[0])
ac2 = g.count_alleles(subpop=subpops[1])
num, den = allel.stats.hudson_fst(ac1, ac2)
num / den
Out[28]:
In [29]:
%timeit allel.stats.weir_cockerham_fst(g, subpops)
%memit allel.stats.weir_cockerham_fst(g, subpops)
In [30]:
%timeit allel.stats.hudson_fst(ac1, ac2)
%memit allel.stats.hudson_fst(ac1, ac2)
In [31]:
gc = allel.GenotypeCArray(data)
gc
Out[31]:
In [32]:
%timeit allel.stats.weir_cockerham_fst(gc, subpops)
In [33]:
%memit allel.stats.weir_cockerham_fst(gc, subpops)
In [34]:
ac, bc, cc = allel.stats.weir_cockerham_fst(gc, subpops)
In [35]:
np.array_equal(a, ac)
Out[35]:
In [36]:
np.array_equal(b, bc)
Out[36]:
In [37]:
np.array_equal(c, cc)
Out[37]:
In [ ]: