notebook.community

Edit and run



In [1]:

    
from __future__ import division, print_function
import numpy as np
np.random.seed(42)
import scipy.stats
import random
random.seed(1)
import matplotlib.pyplot as plt
%matplotlib inline
import sys
# import anhima
# # dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.af
%aimport anhima.f2



In [2]:

    
# simulate genotypes 
n_variants = 10000
n_samples = 100
ploidy = 2
af_dist = scipy.stats.beta(a=.1, b=6)
p_missing = 0
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, af_dist, p_missing, ploidy)

# simulate three sub-populations with relatedness
pop1_genotypes = anhima.sim.simulate_relatedness(genotypes[:, :n_samples//3], relatedness=.5, n_iter=100)
pop23_genotypes = anhima.sim.simulate_relatedness(genotypes[:, n_samples//3:], relatedness=.5, n_iter=100)
pop2_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, n_samples//3:], relatedness=.5, n_iter=20)
pop3_genotypes = anhima.sim.simulate_relatedness(pop23_genotypes[:, :n_samples//3], relatedness=.5, n_iter=20)



In [3]:

    
pop1_ac = anhima.af.allele_count(pop1_genotypes, allele=1)
pop2_ac = anhima.af.allele_count(pop2_genotypes, allele=1)
pop3_ac = anhima.af.allele_count(pop3_genotypes, allele=1)
subpops_ac = np.column_stack([pop1_ac, pop2_ac, pop3_ac])



In [4]:

    
# raw counts of shared doubletons
counts = anhima.f2.count_shared_doubletons(subpops_ac)
counts









    Out[4]:





array([[53, 14, 13],
       [14, 59, 79],
       [13, 79, 44]])



In [5]:

    
subpop_labels = ['pop1', 'pop2', 'pop3']



In [6]:

    
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels);



In [7]:

    
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels,
                                 flip=True);



In [8]:

    
anhima.f2.plot_shared_doubletons(counts, 
                                 subpop_labels=subpop_labels,
                                 relative=True);



In [9]:

    
n_samples = [pop1_genotypes.shape[1], pop2_genotypes.shape[1], pop3_genotypes.shape[1]]
n_samples









    Out[9]:





[33, 34, 33]



In [10]:

    
# normalise doubleton counts by the number of distinct pairs of haplotypes in each comparison
counts_normed = anhima.f2.normalise_doubleton_counts(counts, n_samples=n_samples, ploidy=2)
counts_normed









    Out[10]:





array([[ 0.02470862,  0.00311943,  0.00298439],
       [ 0.00311943,  0.02589991,  0.0176025 ],
       [ 0.00298439,  0.0176025 ,  0.02051282]])



In [11]:

    
anhima.f2.plot_shared_doubletons(counts_normed, 
                                 subpop_labels=subpop_labels);



In [12]:

    
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels);



In [13]:

    
anhima.f2.plot_total_doubletons(counts, subpop_labels=subpop_labels, orientation='horizontal');



In [14]:

    
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5);



In [15]:

    
anhima.f2.plot_f2_fig(counts, subpop_labels=subpop_labels, figsize_factor=1.5, relative=True);



In [16]:

    
anhima.f2.plot_f2_fig(counts, 
                      subpop_labels=subpop_labels, 
                      figsize_factor=1.5, 
                      relative=True, 
                      normed=True, 
                      n_samples=n_samples, 
                      ploidy=2);

anhima.f2 - Doubleton sharing

`anhima.f2` - Doubleton sharing