anhima.sfs - Site frequency spectra


In [1]:
from __future__ import division, print_function
import numpy as np
np.random.seed(1)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
# import anhima
# # dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.af
%aimport anhima.sf

In [2]:
n_samples = 100
ploidy = 2
m = n_samples * ploidy

# simulated allele counts for population 1 - constant size
pop1_derived_af = scipy.stats.reciprocal.rvs(a=.001, b=1, size=1000000)
pop1_derived_ac = np.round(pop1_derived_af * m).astype('i4')
pop1_biallelic_ac = np.column_stack([m - pop1_derived_ac, pop1_derived_ac])

# simulated allele counts for population 2 - deficit of rare variants (population bottleneck)
pop2_derived_af = scipy.stats.beta.rvs(a=.2, b=1.1, size=1000000)
pop2_derived_ac = np.round(pop2_derived_af * m).astype('i4')
pop2_biallelic_ac = np.column_stack([m - pop2_derived_ac, pop2_derived_ac])

# simulated allele counts for population 3 - excess of rare variants
pop3_derived_af = np.append(pop1_derived_af,
                            scipy.stats.beta.rvs(a=.1, b=6, size=1000000))
pop3_derived_ac = np.round(pop3_derived_af * m).astype('i4')
pop3_biallelic_ac = np.column_stack([m - pop3_derived_ac, pop3_derived_ac])

In [3]:
pop1_sfs = anhima.sf.site_frequency_spectrum(pop1_derived_ac)
pop2_sfs = anhima.sf.site_frequency_spectrum(pop2_derived_ac)
pop3_sfs = anhima.sf.site_frequency_spectrum(pop3_derived_ac)

fig, ax = plt.subplots()
anhima.sf.plot_site_frequency_spectrum(pop1_sfs, ax=ax, label='pop1', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop2_sfs, ax=ax, label='pop2', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop3_sfs, ax=ax, label='pop3', plot_kwargs=dict(lw=2))
ax.set_xlabel('derived allele count')
ax.legend();



In [4]:
pop1_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop1_derived_ac)
pop2_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop2_derived_ac)
pop3_sfs_scaled = anhima.sf.site_frequency_spectrum_scaled(pop3_derived_ac)

fig, ax = plt.subplots()
anhima.sf.plot_site_frequency_spectrum(pop1_sfs_scaled, ax=ax, label='pop1', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop2_sfs_scaled, ax=ax, label='pop2', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop3_sfs_scaled, ax=ax, label='pop3', plot_kwargs=dict(lw=2))
ax.set_ylim(bottom=0)
ax.set_ylabel('scaled site frequency')
ax.set_xlabel('derived allele count')
ax.legend(bbox_to_anchor=(1, 1), loc='upper left');



In [5]:
pop1_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop1_biallelic_ac)
pop2_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop2_biallelic_ac)
pop3_sfs_folded = anhima.sf.site_frequency_spectrum_folded(pop3_biallelic_ac)

fig, ax = plt.subplots()
anhima.sf.plot_site_frequency_spectrum(pop1_sfs_folded, ax=ax, label='pop1', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop2_sfs_folded, ax=ax, label='pop2', plot_kwargs=dict(lw=2))
anhima.sf.plot_site_frequency_spectrum(pop3_sfs_folded, ax=ax, label='pop3', plot_kwargs=dict(lw=2))
ax.set_xlabel('minor allele count')
ax.legend();



In [6]:
pop1_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop1_biallelic_ac)
pop2_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop2_biallelic_ac)
pop3_sfs_folded_scaled = anhima.sf.site_frequency_spectrum_folded_scaled(pop3_biallelic_ac)

fig, ax = plt.subplots()
anhima.sf.plot_site_frequency_spectrum(pop1_sfs_folded_scaled, ax=ax, label='pop1', plot_kwargs=dict(lw=2), m=m)
anhima.sf.plot_site_frequency_spectrum(pop2_sfs_folded_scaled, ax=ax, label='pop2', plot_kwargs=dict(lw=2), m=m)
anhima.sf.plot_site_frequency_spectrum(pop3_sfs_folded_scaled, ax=ax, label='pop3', plot_kwargs=dict(lw=2), m=m)
ax.set_ylim(bottom=0)
ax.set_ylabel('scaled site frequency')
ax.set_xlabel('minor allele frequency')
ax.legend(bbox_to_anchor=(1, 1), loc='upper left');



In [7]: