notebook.community

Edit and run



In [1]:

    
from probability import normal_cdf, inverse_normal_cdf # 이전장에서 작업한 확률 코드를 임포트 해야함
import math, random



In [2]:

    
def normal_approximation_to_binomial(n, p):
    """finds mu and sigma corresponding to a Binomial(n, p)"""
    mu = p * n
    sigma = math.sqrt(p * (1 - p) * n)
    return mu, sigma



In [3]:

    
mu_0, sigma_0 = normal_approximation_to_binomial(1000,0.5)

mu_0, sigma_0









    Out[3]:





(500.0, 15.811388300841896)



In [4]:

    
normal_probability_below = normal_cdf

# it's above the threshold if it's not below the threshold
def normal_probability_above(lo, mu=0, sigma=1):
    return 1 - normal_cdf(lo, mu, sigma)
    
# it's between if it's less than hi, but not less than lo
def normal_probability_between(lo, hi, mu=0, sigma=1):
    return normal_cdf(hi, mu, sigma) - normal_cdf(lo, mu, sigma)

# it's outside if it's not between
def normal_probability_outside(lo, hi, mu=0, sigma=1):
    return 1 - normal_probability_between(lo, hi, mu, sigma)

def normal_upper_bound(probability, mu=0, sigma=1):
    """returns the z for which P(Z <= z) = probability"""
    return inverse_normal_cdf(probability, mu, sigma)
    
def normal_lower_bound(probability, mu=0, sigma=1):
    """returns the z for which P(Z >= z) = probability"""
    return inverse_normal_cdf(1 - probability, mu, sigma)

def normal_two_sided_bounds(probability, mu=0, sigma=1):
    """returns the symmetric (about the mean) bounds 
    that contain the specified probability"""
    tail_probability = (1 - probability) / 2

    # upper bound should have tail_probability above it
    upper_bound = normal_lower_bound(tail_probability, mu, sigma)

    # lower bound should have tail_probability below it
    lower_bound = normal_upper_bound(tail_probability, mu, sigma)

    return lower_bound, upper_bound



In [5]:

    
normal_two_sided_bounds(0.95, mu_0, sigma_0)









    Out[5]:





(469.01026640487555, 530.9897335951244)



In [6]:

    
def two_sided_p_value(x, mu=0, sigma=1):
    if x >= mu:
        # if x is greater than the mean, the tail is above x
        return 2 * normal_probability_above(x, mu, sigma)
    else:
        # if x is less than the mean, the tail is below x
        return 2 * normal_probability_below(x, mu, sigma)



In [7]:

    
two_sided_p_value(529.5, mu_0, sigma_0)









    Out[7]:





0.06207721579598857



In [8]:

    
def count_extreme_values():
    extreme_value_count = 0
    for _ in range(100000):
        num_heads = sum(1 if random.random() < 0.5 else 0    # count # of heads
                        for _ in range(1000))                # in 1000 flips
        if num_heads >= 530 or num_heads <= 470:             # and count how often
            extreme_value_count += 1                         # the # is 'extreme'

    return extreme_value_count / 100000



In [9]:

    
count_extreme_values()









    Out[9]:





0.06121



In [10]:

    
p_hat = 525 / 1000
mu = p_hat
sigma = math.sqrt(p_hat * (1 - p_hat) / 1000)

lower, upper = normal_two_sided_bounds(0.95, mu, sigma)

lower < 0.5  < upper









    Out[10]:





True



In [11]:

    
p_hat = 540 / 1000
mu = p_hat
sigma = math.sqrt(p_hat * (1 - p_hat) / 1000)

lower, upper = normal_two_sided_bounds(0.95, mu, sigma)

lower < 0.5  < upper









    Out[11]:





False



In [12]:

    
upper_p_value = normal_probability_above
lower_p_value = normal_probability_below



In [13]:

    
def run_experiment():
    """flip a fair coin 1000 times, True = heads, False = tails"""
    return [random.random() < 0.5 for _ in range(1000)]

def reject_fairness(experiment):
    """using the 5% significance levels"""
    num_heads = len([flip for flip in experiment if flip])
    return num_heads < 469 or num_heads > 531



In [14]:

    
random.seed(0)
experiments = [run_experiment() for _ in range(1000)]

[sum(experiment) for experiment in experiments][:10]









    Out[14]:





[508, 491, 484, 483, 502, 499, 483, 511, 481, 538]



In [15]:

    
num_rejections = len([experiment for experiment in experiments if reject_fairness(experiment)])
num_rejections









    Out[15]:





46



In [16]:

    
def estimated_parameters(N, n):
    p = n / N
    sigma = math.sqrt(p * (1 - p) / N)
    return p, sigma

def a_b_test_statistic(N_A, n_A, N_B, n_B):
    p_A, sigma_A = estimated_parameters(N_A, n_A)
    p_B, sigma_B = estimated_parameters(N_B, n_B)
    return (p_B - p_A) / math.sqrt(sigma_A ** 2 + sigma_B ** 2)



In [17]:

    
def B(alpha, beta):
    """a normalizing constant so that the total probability is 1"""
    return math.gamma(alpha) * math.gamma(beta) / math.gamma(alpha + beta)

def beta_pdf(x, alpha, beta):
    if x < 0 or x > 1:          # no weight outside of [0, 1]    
        return 0        
    return x ** (alpha - 1) * (1 - x) ** (beta - 1) / B(alpha, beta)



In [18]:

    
z = a_b_test_statistic(1000, 200, 1000, 180)
z









    Out[18]:





-1.1403464899034472



In [19]:

    
two_sided_p_value(z) # 두 광고 효과의 평균이 같을 확률이 높음 (p > 0.05) = 광고효과가 같음









    Out[19]:





0.254141976542236



In [20]:

    
z = a_b_test_statistic(1000, 200, 1000, 150)
z









    Out[20]:





-2.948839123097944



In [21]:

    
two_sided_p_value(z) # 두 광고 효과의 평균이 같을 확률이 매우 낮음 (p < 0.05) = 광고효과가 다름









    Out[21]:





0.003189699706216853



In [22]:

    
##
#
# Bayesian Inference
#
##

def B(alpha, beta):
    """a normalizing constant so that the total probability is 1"""
    return math.gamma(alpha) * math.gamma(beta) / math.gamma(alpha + beta)

def beta_pdf(x, alpha, beta):
    if x < 0 or x > 1:          # no weight outside of [0, 1]    
        return 0        
    return x ** (alpha - 1) * (1 - x) ** (beta - 1) / B(alpha, beta)