Chapter 6 Probability

Dependence and Independence

Conditional Probability



In [1]:

    
import random



In [2]:

    
def random_kid():
    return random.choice(['girl', 'boy'])



In [3]:

    
both_girls = 0
older_girl = 0
either_girl = 0



In [4]:

    
random.seed(0)



In [5]:

    
for _ in range(10000):
    younger_kid = random_kid()
    older_kid = random_kid()
    if older_kid == 'girl':
        older_girl += 1
    if younger_kid == 'girl' and older_kid == 'girl':
        both_girls += 1
    if younger_kid == 'girl' or older_kid == 'girl':
        either_girl += 1



In [6]:

    
print('P(both | older) =', both_girls / older_girl)









    



P(both | older) = 0.5008888011060636



In [7]:

    
print('P(both | either) =', both_girls / either_girl)









    



P(both | either) = 0.3368756641870351

Bayes's Theorem

Random Variables

Continuous Distribution



In [8]:

    
def uniform_pdf(x):
    return 1 if x >= 0 and x < 1 else 0



In [9]:

    
def uniform_cdf(x):
    """returns the probability that a uniform random variable is <= x"""
    if x < 0:
        return 0
    elif x < 1:
        return x
    else:
        return 1

The Normal Distribution



In [10]:

    
import math



In [11]:

    
def normal_pdf(x, mu=0, sigma=1):
    sqrt_two_pi = math.sqrt(2 * math.pi)
    return math.exp(-(x - mu) ** 2 / (2 * sigma ** 2)) / (sqrt_two_pi * sigma)



In [12]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [13]:

    
xs = [x / 10.0 for x in range(-50, 50)]



In [14]:

    
plt.plot(xs, [normal_pdf(x) for x in xs], '-', label='mu=0, sigma=1')
plt.plot(xs, [normal_pdf(x, sigma=2) for x in xs], '--', label='mu=0, sigma=2')
plt.plot(xs, [normal_pdf(x, sigma=0.5) for x in xs], ':', label='mu=0, sigma=0.5')
plt.plot(xs, [normal_pdf(x, mu=-1) for x in xs], '-.', label='mu=-1, sigma=1')
plt.legend()
plt.title('Various Normal pdfs')
plt.show()



In [15]:

    
def normal_cdf(x, mu=0, sigma=1):
    return (1 + math.erf((x - mu) / math.sqrt(2) / sigma)) / 2



In [16]:

    
plt.plot(xs, [normal_cdf(x) for x in xs], '-', label='mu=0, sigma=1')
plt.plot(xs, [normal_cdf(x, sigma=2) for x in xs], '--', label='mu=0, sigma=2')
plt.plot(xs, [normal_cdf(x, sigma=0.5) for x in xs], ':', label='mu=0, sigma=0.5')
plt.plot(xs, [normal_cdf(x, mu=-1) for x in xs], '-.', label='mu=-1, sigma=1')
plt.legend(loc=4)
plt.title('Various Normal cdfs')
plt.show()



In [17]:

    
def inverse_normal_cdf(p, mu=0, sigma=1, tolerance=0.00001):
    """find approximate inverse using binary search"""
    
    # if not standard, compute standard and rescale
    if mu != 0 or sigma != 1:
        return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
    
    low_z, low_p = -10.0, 0
    hi_z, hi_p = 10.0, 1
    while hi_z - low_z > tolerance:
        mid_z = (low_z + hi_z) / 2
        mid_p = normal_cdf(mid_z)
        if mid_p < p:
            low_z = mid_z
            low_p = mid_p
        elif mid_p > p:
            hi_z = mid_z
            hi_p = mid_p
        else:
            break
            
    return mid_z

The Central Limit Theorem



In [18]:

    
import random
from collections import Counter



In [19]:

    
def bernoulli_trial(p):
    return 1 if random.random() < p else 0



In [20]:

    
def binomial(n, p):
    return sum(bernoulli_trial(p) for _ in range(n))



In [21]:

    
def make_hist(p, n, num_points):
    
    data = [binomial(n, p) for _ in range(num_points)]
    
    # use a bar chart to show the actual binomial samples
    histogram = Counter(data)
    plt.bar([x - 0.4 for x in histogram.keys()],
            [v / num_points for v in histogram.values()],
            0.8,
            color='0.75')
    
    mu = p * n
    sigma = math.sqrt(n * p * (1 - p))
    
    # use a line chart to show the normal approximation
    xs = range(min(data), max(data) + 1)
    ys = [normal_cdf(i + 0.5, mu, sigma) - normal_cdf(i - 0.5, mu, sigma)
          for i in xs]
    plt.plot(xs, ys)
    plt.title('Binomial Distribution vs. Normal Approximation')
    plt.show()



In [24]:

    
make_hist(0.75, 100, 10000)

Chapter 6 Probability

Dependence and Independence

Conditional Probability

Bayes's Theorem

Random Variables

Continuous Distribution

The Normal Distribution

The Central Limit Theorem

For Further Exploration