In [1]:
import numpy as np
import h5py
import petl as etl
import sys
sys.path.insert(0, '../..')
import allel; print('allel', allel.__version__)
In [2]:
callset = h5py.File('/data/coluzzi/ag1000g/data/phase1/release/AR3/variation/main/hdf5/ag1000g.phase1.ar3.pass.h5',
mode='r')
callset
Out[2]:
In [28]:
chrom = '3L'
In [29]:
tbl_samples = (etl
.fromtsv('/data/coluzzi/ag1000g/data/phase1/release/AR3/samples/samples.meta.txt')
.convert('index', int)
)
tbl_samples
Out[29]:
In [41]:
pop1 = 'BFS'
pop2 = 'BFM'
In [42]:
pop1_idx = tbl_samples.eq('population', pop1).values('index').list()
pop2_idx = tbl_samples.eq('population', pop2).values('index').list()
In [43]:
genotypes = allel.GenotypeChunkedArray(callset[chrom]['calldata/genotype'])
genotypes
Out[43]:
In [44]:
acs = genotypes.count_alleles_subpops({pop1: pop1_idx, pop2: pop2_idx}, max_allele=3)
acs
Out[44]:
In [45]:
ac1, ac2 = acs[pop1], acs[pop2]
In [46]:
ac = ac1 + ac2
loc_seg = ac.is_segregating()
ac1_seg = ac1.compress(loc_seg)
ac2_seg = ac2.compress(loc_seg)
In [47]:
size = 5000
In [48]:
td1 = allel.stats.moving_tajima_d(ac1_seg, size=size)
td2 = allel.stats.moving_tajima_d(ac2_seg, size=size)
dtd = allel.stats.moving_delta_tajima_d(ac1_seg, ac2_seg, size=size)
In [49]:
pos = callset[chrom]['variants/POS'][:]
pos_seg = pos.compress(loc_seg)
In [50]:
%matplotlib inline
import matplotlib.pyplot as plt
In [53]:
x = allel.stats.moving_mean(pos_seg, size=size)
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(2, 1, 1)
ax.plot(x, td1, label="Tajima's D, %s" % pop1)
ax.plot(x, td2, label="Tajima's D, %s" % pop2)
ax.autoscale(axis='x', tight=True)
ax.legend()
ax.set_ylabel("Tajima's $D$")
ax = fig.add_subplot(2, 1, 2)
ax.plot(x, dtd, label="Delta Tajima's D", color='k')
ax.autoscale(axis='x', tight=True);
ax.set_ylabel("$\Delta$ Tajima's $D$")
ax.grid(axis='y')
fig.tight_layout()
In [59]:
plt.hist(dtd, bins=np.linspace(-6, 6, 30))
plt.xlim(-6, 6);
In [60]:
x = np.random.normal(loc=np.mean(dtd), scale=np.std(dtd), size=dtd.size)
plt.hist(x, bins=np.linspace(-6, 6, 30))
plt.xlim(-6, 6);
In [61]:
td1 = allel.stats.moving_tajima_d(ac1, size=size)
td2 = allel.stats.moving_tajima_d(ac2, size=size)
dtd = allel.stats.moving_delta_tajima_d(ac1, ac2, size=size)
x = allel.stats.moving_mean(pos, size=size)
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(2, 1, 1)
ax.plot(x, td1, label="Tajima's D, %s" % pop1)
ax.plot(x, td2, label="Tajima's D, %s" % pop2)
ax.autoscale(axis='x', tight=True)
ax.legend()
ax.set_ylabel("Tajima's $D$")
ax = fig.add_subplot(2, 1, 2)
ax.plot(x, dtd, label="Delta Tajima's D", color='k')
ax.autoscale(axis='x', tight=True);
ax.set_ylabel("$\Delta$ Tajima's $D$")
ax.grid(axis='y')
fig.tight_layout()