Genotypes and phenotypes

Genotypes are biallelic in the form of (0, 1) where 0 is a recessive allele and 1 is a dominant allele. Each genotype is associated with a character of the organism (e.g., seed color) and recessive and dominant phenotypes (e.g., "green" and "yellow").



In [3]:

    
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
import random



In [4]:

    
def create_parental_population(n, allele):
    return np.array([allele] * 2*n).reshape(n, 2)



In [5]:

    
create_parental_population(10, 0)









    Out[5]:





array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0]])



In [30]:

    
n = 1000



In [31]:

    
p1_dominant = create_parental_population(n, 1)



In [32]:

    
p1_recessive = create_parental_population(n, 0)



In [13]:

    
def get_alleles(population):
    return np.apply_along_axis(np.random.choice, 1, population)

def cross_populations(population1, population2):
    return np.array([get_alleles(population1), get_alleles(population2)]).T



In [33]:

    
f1 = cross_populations(p1_dominant, p1_recessive)



In [34]:

    
f2 = cross_populations(f1, f1)



In [35]:

    
f2[:10]









    Out[35]:





array([[1, 0],
       [0, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1]])



In [38]:

    
def get_allele_frequencies(population):
    """
    Return the frequency of each allele in the given population.
    """
    unique, inverse = np.unique(population, return_inverse=True)
    count = np.zeros(len(unique), np.int)
    np.add.at(count, inverse, 1)
    return count / np.float(count.sum())

def get_expected_genotype_frequencies(population):
    """
    Return an array of genotype frequencies expected for the given population based on Hardy-Weinberg equilibrium.
    """
    allele_frequencies = get_allele_frequencies(population)
    return np.array([
        allele_frequencies[0] ** 2,
        2 * np.product(allele_frequencies),
        allele_frequencies[1] ** 2
    ])

def get_observed_genotype_frequencies(population):
    """
    Return the observed genotype frequencies of the given population.
    """
    genotypes = np.apply_along_axis(sum, 1, population)
    return np.bincount(genotypes) / np.sum(genotypes, dtype=np.float64)



In [22]:

    
f2_allele_frequencies = get_allele_frequencies(f2)



In [36]:

    
np.sum(get_expected_genotype_frequencies(f2))









    Out[36]:





0.99999999999999978



In [39]:

    
np.sum(get_observed_genotype_frequencies(f2))









    Out[39]:





1.0172939979654121



In [189]:

    
f3 = cross_populations(f2, f2)



In [190]:

    
get_genotype_frequencies(f3)









    Out[190]:





array([ 0.37649402,  0.23904382,  0.38047809])