Coin Flipping



In [1]:

    
from __future__ import division
import math
from probability import *

def normal_approximation_to_binomial(n, p):
    mu = p*n
    sigma = math.sqrt(p * (1-p) * n)
    return mu, sigma



In [2]:

    
normal_approximation_to_binomial(5000, 0.5)









    Out[2]:





(2500.0, 35.35533905932738)

Search PDF



In [3]:

    
normal_prob_below = normal_cdf

def normal_prob_above(lo, mu = 0, sigma = 1):
    return 1 - normal_cdf(lo, mu, sigma)

def normal_prob_between(lo, hi, mu = 0, sigma = 1):
    return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)

def normal_prob_outside(lo, hi, mu = 0, sigma = 1):
    1 - normal_prob_between(lo, hi, mu, sigma)



In [4]:

    
def normal_upper_bound(p, mu = 0, sigma = 1):
    return inverse_normal_cdf(p, mu, sigma)

def normal_lower_bound(p, mu = 0, sigma = 1):
    return inverse_normal_cdf(1 - p, mu, sigma)

def normal_two_sided_bounds(p, mu = 0, sigma = 1):
    tail_prob = (1 - p) / 2
    
    upper_bound = normal_lower_bound(tail_prob, mu, sigma)
    lower_bound = normal_upper_bound(tail_prob, mu, sigma)
    
    return lower_bound, upper_bound



In [5]:

    
mu_0, sigma_0 = normal_approximation_to_binomial(1000, 0.5)



In [6]:

    
# H_0 range
normal_two_sided_bounds(0.95, mu_0, sigma_0)









    Out[6]:





(469.011020350622, 530.988979649378)



In [7]:

    
#power

lo, hi = normal_two_sided_bounds(0.95, mu_0, sigma_0)

mu_1, sigma_1 = normal_approximation_to_binomial(1000, 0.55)

type_2_prob = normal_prob_between(lo, hi, mu_1, sigma_1)
power = 1 - type_2_prob



In [8]:

    
power









    Out[8]:





0.8865572138760063



In [9]:

    
def two_sided_p_value(x, mu = 0, sigma = 1):
    if x >= mu:
        return 2 * normal_prob_above(x, mu, sigma)
    else:
        return 2 * normal_prob_below(x, mu, sigma)



In [10]:

    
two_sided_p_value(529.5, mu_0, sigma_0)









    Out[10]:





0.06207721579598857



In [11]:

    
extreme_value_count = 0
for _ in xrange(100000):
    num_heads = sum(1 if random.random() < 0.5 else 0
                   for _ in xrange(1000))
    if num_heads >= 530 or num_heads <= 470:
        extreme_value_count += 1
        
print extreme_value_count / 100000



In [12]:

    
two_sided_p_value(531.5, mu_0, sigma_0)









    Out[12]:





0.046345287837786575



In [13]:

    
def run_experiment():
    return [random.random() < 0.5 for _ in xrange(1000)]

def reject_fairness(experiment):
    num_heads = len([flip for flip in experiment if flip])
    return num_heads < 469 or num_heads > 531



In [14]:

    
random.seed(0)
experiments = [run_experiment() for _ in xrange(1000)]
num_rejections = len([experiment
                     for experiment in experiments
                     if reject_fairness(experiment)])

print num_rejections

A/B Testing



In [15]:

    
def estimated_parameters(N, n):
    p = n / N
    sigma = math.sqrt(p * (1-p) / N)
    return p, sigma
def a_b_test_statistic(N_A, n_A, N_B, n_B):
    p_A, sigma_A = estimated_parameters(N_A, n_A)
    p_B, sigma_B = estimated_parameters(N_B, n_B)
    
    return (p_B - p_A) / math.sqrt(sigma_A**2 + sigma_B**2)



In [19]:

    
z = a_b_test_statistic(1000,200,1000,150)



In [20]:

    
two_sided_p_value(z)









    Out[20]:





0.003189699706216853

Bayesian



In [22]:

    
def B(alpha, beta):
    return math.gamma(alpha) * math.gamma(beta) / math.gamma(alpha + beta)

def beta_pdf(x, alpha, beta):
    if x < 0 or x > 1:
        return 0
    return x**(alpha - 1) * (1 - x)**(beta -1) / B(alpha, beta)



In [ ]: