In [3]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import random
In [4]:
def create_parental_population(n, allele):
return np.array([allele] * 2*n).reshape(n, 2)
In [5]:
create_parental_population(10, 0)
Out[5]:
In [30]:
n = 1000
In [31]:
p1_dominant = create_parental_population(n, 1)
In [32]:
p1_recessive = create_parental_population(n, 0)
In [13]:
def get_alleles(population):
return np.apply_along_axis(np.random.choice, 1, population)
def cross_populations(population1, population2):
return np.array([get_alleles(population1), get_alleles(population2)]).T
In [33]:
f1 = cross_populations(p1_dominant, p1_recessive)
In [34]:
f2 = cross_populations(f1, f1)
In [35]:
f2[:10]
Out[35]:
In [38]:
def get_allele_frequencies(population):
"""
Return the frequency of each allele in the given population.
"""
unique, inverse = np.unique(population, return_inverse=True)
count = np.zeros(len(unique), np.int)
np.add.at(count, inverse, 1)
return count / np.float(count.sum())
def get_expected_genotype_frequencies(population):
"""
Return an array of genotype frequencies expected for the given population based on Hardy-Weinberg equilibrium.
"""
allele_frequencies = get_allele_frequencies(population)
return np.array([
allele_frequencies[0] ** 2,
2 * np.product(allele_frequencies),
allele_frequencies[1] ** 2
])
def get_observed_genotype_frequencies(population):
"""
Return the observed genotype frequencies of the given population.
"""
genotypes = np.apply_along_axis(sum, 1, population)
return np.bincount(genotypes) / np.sum(genotypes, dtype=np.float64)
In [22]:
f2_allele_frequencies = get_allele_frequencies(f2)
In [36]:
np.sum(get_expected_genotype_frequencies(f2))
Out[36]:
In [39]:
np.sum(get_observed_genotype_frequencies(f2))
Out[39]:
In [189]:
f3 = cross_populations(f2, f2)
In [190]:
get_genotype_frequencies(f3)
Out[190]: