In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
from math import erf, sqrt


def normal_approx_to_binomial(n, p):
    '''finds mu and sigma corresponding to a Binomial(n, p)'''
    mu = p * n
    sigma = sqrt(p * (1 - p) * n)
    return mu, sigma


def normal_cdf(x, mu=0, sigma=1):
    '''We can use a normal CDF to determine if a random variable
    falls within some range.
    '''
    return (1 + erf((x - mu) / sqrt(2) / sigma)) / 2


normal_prob_below = normal_cdf


def normal_prob_above(lo, mu=0, sigma=1):
    return 1 - normal_cdf(lo, mu, sigma)


def normal_prob_between(lo, hi, mu=0, sigma=1):
    return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)


def normal_prob_outside(lo, hi, mu=0, sigma=1):
    return 1 - normal_prob_between(lo, hi, mu, sigma)


def two_sided_p_value(x, mu=0, sigma=1):
    if x >= mu:
        return 2 * normal_prob_above(x, mu, sigma)
    else:
        return 2 * normal_prob_below(x, mu, sigma)

In [3]:
# To find the interval containing p percent of a random distribution, use the inverse normal CDF
def inverse_normal_cdf(p, mu=0, sigma=1, tolerance=0.00001):
    if mu != 0 or sigma != 1:
        return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
    
    low_z, low_p = -10.0, 0
    hi_z, hi_p = 10.0, 1
    while hi_z - low_z > tolerance:
        mid_z = (low_z + hi_z) / 2
        mid_p = normal_cdf(mid_z)
        if mid_p < p:
            low_z, low_p = mid_z, mid_p
        elif mid_p > p:
            hi_z, hi_p = mid_z, mid_p
        else:
            break
    
    return mid_z


def normal_upper_bound(prob, mu=0, sigma=1):
    '''returns the z for which P(Z <= z) = prob'''
    return inverse_normal_cdf(prob, mu, sigma)


def normal_lower_bound(prob, mu=0, sigma=1):
    '''returns the z for which P(Z >= z) = prob'''
    return inverse_normal_cdf(1 - prob, mu, sigma)


def normal_two_sided_bounds(prob, mu=0, sigma=1):
    '''returns the symmetric (about the mean) bounds
    that contain the specified prob'''
    tail_probability = (1 - prob) / 2
    upper_bound = normal_lower_bound(tail_probability, mu, sigma)
    lower_bound = normal_upper_bound(tail_probability, mu, sigma)
    return lower_bound, upper_bound


print("\nFlip a fair coin 1000 times...")
mu0, sigma0 = normal_approx_to_binomial(1000, 0.5)
print('mu=' + str(mu0) + ' and sigma=' + str(sigma0))


print('\nTest if your hypothesis falls outside of bounds...')
print(normal_two_sided_bounds(0.95, mu0, sigma0))


Flip a fair coin 1000 times...
mu=500.0 and sigma=15.811388300841896

Test if your hypothesis falls outside of bounds...
(469.01026640487555, 530.9897335951244)

In [ ]: