anhima.f2 - Doubleton sharing


In [1]:
from __future__ import division, print_function
import numpy as np
np.random.seed(42)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
# import anhima
# # dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.af
%aimport anhima.f2

In [2]:
# simulate genotypes 
n_variants = 10000
n_samples = 100
ploidy = 2
af_dist = scipy.stats.beta(a=.1, b=6)
p_missing = 0
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)

# simulate three sub-populations with relatedness
pop1_genotypes = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//3], relatedness=.5, n_iter=100)
pop23_genotypes = anhima.sim.simulate_relatedness(genotypes[:, n_samples//3:], relatedness=.5, n_iter=100)
pop2_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, n_samples//3:], relatedness=.5, n_iter=20)
pop3_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, :n_samples//3], relatedness=.5, n_iter=20)

In [3]:
pop1_ac = anhima.af.allele_count(pop1_genotypes, allele=1)
pop2_ac = anhima.af.allele_count(pop2_genotypes, allele=1)
pop3_ac = anhima.af.allele_count(pop3_genotypes, allele=1)
subpops_ac = np.column_stack([pop1_ac, pop2_ac, pop3_ac])

In [4]:
# raw counts of shared doubletons
counts = anhima.f2.count_shared_doubletons(subpops_ac)
counts


Out[4]:
array([[53, 14, 13],
       [14, 59, 79],
       [13, 79, 44]])

In [5]:
subpop_labels = ['pop1', 'pop2', 'pop3']

In [6]:
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels);



In [7]:
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels,
                                 flip=True);



In [8]:
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels,
                                 relative=True);



In [9]:
n_samples = [pop1_genotypes.shape[1], pop2_genotypes.shape[1], pop3_genotypes.shape[1]]
n_samples


Out[9]:
[33, 34, 33]

In [10]:
# normalise doubleton counts by the number of distinct pairs of haplotypes in each comparison
counts_normed = anhima.f2.normalise_doubleton_counts(counts, n_samples=n_samples, ploidy=2)
counts_normed


Out[10]:
array([[ 0.02470862,  0.00311943,  0.00298439],
       [ 0.00311943,  0.02589991,  0.0176025 ],
       [ 0.00298439,  0.0176025 ,  0.02051282]])

In [11]:
anhima.f2.plot_shared_doubletons(counts_normed, 
                                 subpop_labels=subpop_labels);



In [12]:
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels);



In [13]:
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels, orientation='horizontal');



In [14]:
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5);



In [15]:
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5, relative=True);



In [16]:
anhima.f2.plot_f2_fig(counts, 
                      subpop_labels=subpop_labels, 
                      figsize_factor=1.5, 
                      relative=True, 
                      normed=True, 
                      n_samples=n_samples, 
                      ploidy=2);