Chapter 7 Hypothesis and Inference

Statistical Hypothesis Testing

Example: Flipping a Coin



In [1]:

    
import math
import ipynb2mod
from Chapter6 import normal_cdf
from Chapter6 import inverse_normal_cdf









    



importing Jupyter Notebook from Chapter6.ipynb
P(both | older) = 0.5008888011060636
P(both | either) = 0.3368756641870351



In [2]:

    
def normal_approximation_to_binomial(n, p):
    """finds mu and sigma corresponding to a Binomial(n, p)"""
    mu = n * p
    sigma = math.sqrt(n * p * (1 - p))
    return mu, sigma



In [3]:

    
# the normal cdf _is_ the probability the variable is below a threshold
normal_probability_below = normal_cdf



In [4]:

    
# it's above the threshold if it's not below the threshold
def normal_probability_above(lo, mu=0, sigma=1):
    return 1 - normal_cdf(lo, mu, sigma)



In [5]:

    
# it's between if it's less than hi, but not less than lo
def normal_probability_between(lo, hi, mu=0, sigma=1):
    return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)



In [6]:

    
# it's outside if it's not between
def normal_probability_outside(lo, hi, mu=0, sigma=1):
    return 1 - normal_probability_between(lo, hi, mu, sigma)



In [7]:

    
def normal_upper_bound(probability, mu=0, sigma=1):
    """returns the z for which P(Z <= z) = probability"""
    return inverse_normal_cdf(probability, mu, sigma)



In [8]:

    
def normal_lower_bound(probability, mu=0, sigma=1):
    """returns the z for which P(Z >= z) = probability"""
    return inverse_normal_cdf(1 - probability, mu, sigma)



In [9]:

    
def normal_two_sided_bounds(probability, mu=0, sigma=1):
    """returns the symmetric (about the mean) bounds that contain the specified probability"""
    tail_probability = (1 - probability) / 2
    
    # upper bound should have tail_probability above it
    upper_bound = normal_lower_bound(tail_probability, mu, sigma)
    
    # lower bound should have tail_probability below it
    lower_bound = normal_upper_bound(tail_probability, mu, sigma)
    
    return lower_bound, upper_bound



In [10]:

    
mu_0, sigma_0 = normal_approximation_to_binomial(1000, 0.5)
print('mu_0: ', mu_0)
print('sigma_0: ',sigma_0)









    



mu_0:  500.0
sigma_0:  15.811388300841896



In [11]:

    
normal_two_sided_bounds(0.95, mu_0, sigma_0)









    Out[11]:





(469.01026640487555, 530.9897335951244)



In [12]:

    
# 95% bounds based on assumption p is 0.5
lo, hi = normal_two_sided_bounds(0.95, mu_0, sigma_0)

# actual mu and sigma based on p = 0.55
mu_1, sigma_1 = normal_approximation_to_binomial(1000, 0.55)

# a type 2 error means we fail to reject the null hypothesis
# which will happen when X is still in our original interval
type_2_probability = normal_probability_between(lo, hi, mu_1, sigma_1)
power = 1 - type_2_probability



In [13]:

    
power









    Out[13]:





0.886548001295367



In [ ]:

Confidence Intervals

P-hacking

Example: Running an A/B Test

Bayesian Inference



In [ ]: