In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
from math import erf, sqrt
def normal_approx_to_binomial(n, p):
'''finds mu and sigma corresponding to a Binomial(n, p)'''
mu = p * n
sigma = sqrt(p * (1 - p) * n)
return mu, sigma
def normal_cdf(x, mu=0, sigma=1):
'''We can use a normal CDF to determine if a random variable
falls within some range.
'''
return (1 + erf((x - mu) / sqrt(2) / sigma)) / 2
normal_prob_below = normal_cdf
def normal_prob_above(lo, mu=0, sigma=1):
return 1 - normal_cdf(lo, mu, sigma)
def normal_prob_between(lo, hi, mu=0, sigma=1):
return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)
def normal_prob_outside(lo, hi, mu=0, sigma=1):
return 1 - normal_prob_between(lo, hi, mu, sigma)
def two_sided_p_value(x, mu=0, sigma=1):
if x >= mu:
return 2 * normal_prob_above(x, mu, sigma)
else:
return 2 * normal_prob_below(x, mu, sigma)
In [3]:
# To find the interval containing p percent of a random distribution, use the inverse normal CDF
def inverse_normal_cdf(p, mu=0, sigma=1, tolerance=0.00001):
if mu != 0 or sigma != 1:
return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
low_z, low_p = -10.0, 0
hi_z, hi_p = 10.0, 1
while hi_z - low_z > tolerance:
mid_z = (low_z + hi_z) / 2
mid_p = normal_cdf(mid_z)
if mid_p < p:
low_z, low_p = mid_z, mid_p
elif mid_p > p:
hi_z, hi_p = mid_z, mid_p
else:
break
return mid_z
def normal_upper_bound(prob, mu=0, sigma=1):
'''returns the z for which P(Z <= z) = prob'''
return inverse_normal_cdf(prob, mu, sigma)
def normal_lower_bound(prob, mu=0, sigma=1):
'''returns the z for which P(Z >= z) = prob'''
return inverse_normal_cdf(1 - prob, mu, sigma)
def normal_two_sided_bounds(prob, mu=0, sigma=1):
'''returns the symmetric (about the mean) bounds
that contain the specified prob'''
tail_probability = (1 - prob) / 2
upper_bound = normal_lower_bound(tail_probability, mu, sigma)
lower_bound = normal_upper_bound(tail_probability, mu, sigma)
return lower_bound, upper_bound
print("\nFlip a fair coin 1000 times...")
mu0, sigma0 = normal_approx_to_binomial(1000, 0.5)
print('mu=' + str(mu0) + ' and sigma=' + str(sigma0))
print('\nTest if your hypothesis falls outside of bounds...')
print(normal_two_sided_bounds(0.95, mu0, sigma0))
In [ ]: