In [190]:
    
from IPython.display import Image
from IPython.display import display
intersection = Image(url='http://www.astroml.org/_images/fig_prob_sum_1.png',width=500)
joint_probability = Image(url='http://www.astroml.org/_images/fig_conditional_probability_1.png',width=800)
random_conversion = Image(url='http://www.astroml.org/_images/fig_transform_distribution_1.png',width=800)
kurtosis_skew = Image(url='http://www.astroml.org/_images/fig_kurtosis_skew_1.png',width=500)
uniform_dist = Image(url='http://www.astroml.org/_images/fig_uniform_distribution_1.png',width=500)
gaussian_dist = Image(url='http://www.astroml.org/_images/fig_gaussian_distribution_1.png',width=500)
binomial_dist = Image(url='http://www.astroml.org/_images/fig_binomial_distribution_1.png',width=500)
poisson_dist = Image(url='http://www.astroml.org/_images/fig_poisson_distribution_1.png',width=500)
cauchy_dist = Image(url='http://www.astroml.org/_images/fig_cauchy_distribution_1.png',width=500)
cauchy_dist2 = Image(url='http://www.astroml.org/_images/fig_cauchy_median_mean_1.png',width=500)
laplace_dist = Image(url='http://www.astroml.org/_images/fig_laplace_distribution_1.png',width=500)
chi_squared_dist = Image(url='http://www.astroml.org/_images/fig_chi2_distribution_1.png',width=500)
student_t_dist = Image(url='http://www.astroml.org/_images/fig_student_t_distribution_1.png',width=500)
fisher_f_dist = Image(url='http://www.astroml.org/_images/fig_fisher_f_distribution_1.png',width=500)
beta_dist = Image(url='http://www.astroml.org/_images/fig_beta_distribution_1.png',width=500)
gamma_dist = Image(url='http://www.astroml.org/_images/fig_gamma_distribution_1.png',width=500)
weibull_dist = Image(url='http://www.astroml.org/_images/fig_weibull_distribution_1.png',width=500)
display(intersection)
    
    
Definitions
$A$, $B$ are events
$p(A)$ is the probability of $A$ occurring
Union
Intersection
where $p(A|B)$ means the probability of event $A$ occuring "given" that event $B$ has occurred.
Law of Total Probability (derives from intersection definition)
Given that events $B_i$, $i = 1,...,N$ are disjoint and their union is the set of all possible outcomes.
Definitions
$x$, $y$ are random variables
Independent variables
Dependent variables
Marginal Probability Function
Law of Total Probability (continuous)
As an example of joint probability and conditional probability for two variables $x$ and $y$, see the image below.
In [191]:
    
display(joint_probability)
    
    
Bayes Rule (Theorem)
We can derive this using the definitions above (intersection and law of total probability).
But what does it mean?
Example: Monty Hall Problem
There is a full derivation in the text, but I'm not going to repeat it here, but I do have a numerical demonstration:
In [192]:
    
import numpy as np
# Number of boxes and trials
n_boxes = 3
n_trials = 1000
n_boxes_to_remove = n_boxes - 2 # n-2 leaves 1 box to choose from
# Initializing vars
keep_box_win = 0
switch_box_win = 0
no_win = 0
debug = False
for i in range(n_trials):
    
    available_boxes = range(n_boxes)
    prize_box = np.random.choice(n_boxes, 1)
    if debug: print "Prize", prize_box
    # We choose the first one
    our_choice = np.random.choice(n_boxes, 1)
    if debug: print "Our choice", our_choice
    available_boxes.remove(our_choice)
    if debug: print "Available boxes", available_boxes
    
    # We first temporarily remove the prize, so to make sure *Monty* doesn't remove it.
    if prize_box != our_choice:
        available_boxes.remove(prize_box)
    # Now Monty Hall removes all-but-one remaining box (none have the prize)    
    # This must be done serially in order to assure no duplicate removals
    for j in range(n_boxes_to_remove):
        available_boxes.remove(np.random.choice(available_boxes, 1))
    if debug: print "Monty removes %d box(es) leaving %d box(es)." % (n_boxes_to_remove, n_boxes-n_boxes_to_remove-1)
        
    # Put the prize box back in the remaining boxes if possible
    if prize_box != our_choice:
        available_boxes.extend(prize_box)
    if debug: print "Available boxes", available_boxes
    
    # Do we win for keeping our box?
    if prize_box == our_choice:
        keep_box_win += 1.
    elif np.random.choice(available_boxes, 1) == prize_box:
        switch_box_win += 1.
    else:
        no_win += 1.
        
n_wins = keep_box_win + switch_box_win
print "Wins by keeping our first choice box = %d / %d = %4.3f    Percent of wins = %4.3f" \
       % (keep_box_win, n_trials, keep_box_win/n_trials, keep_box_win/n_wins)
print "Wins by switching to available box   = %d / %d = %4.3f    Percent of wins = %4.3f" \
       % (switch_box_win, n_trials, switch_box_win/n_trials, switch_box_win/n_wins)
print "No winner                            = %d / %d = %4.3f" % (no_win, n_trials, no_win/n_trials)
    
    
The Effect of Transforming Random Variables
Using an example of $y = exp(x)$ -> $x = ln(y)$, where x is a uniform random distribution between (0,1). Plugging in:
So the distribution will follow a $1/y$ profile from $y = \Phi(x) = exp(x)$ for x = (0,1) -> $y = (1,e)$ as seen below:
In [193]:
    
display(random_conversion)
    
    
For a distribution $h(x)$ of values from the full population set = population statistics For a partial sampling of $h(x)$, we refer to it as $f(x)$ = sample statistics
Arithemetic Mean (also known as expectation value)
np.mean()Variance
np.var()Standard Deviation
np.std()Skewness
scipy.stats.skew()Kurtosis
scipy.stats.kurtosis()Absolute Deviation about $d$
Mode
scipy.stats.mode()Quantiles (also known as percentiles for $p$)
np.percentile()
In [194]:
    
display(kurtosis_skew)
    
    
Median/Quartile estimates vs Mean/Sigma estimates for a sample
But median and quartile values are more expensive to calculate for large samples than mean and standard deviation.
Can estimate sigma based on the interquartile range:
$\sigma_G = 0.7413 (q_{75} - q_{25})$
This can be found in the stats package: stats.sigmaG()
In [280]:
    
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline
def plot_dist(pop, dist=None):
    fig = plt.figure(figsize=(10,7))
    ax = fig.add_subplot(111)
    n, bins, patches = ax.hist(pop, 15, histtype='stepfilled', normed=True)
    #ax.hist(pop, 15, histtype='stepfilled', normed=True)
    #ax.plot(bins, y
    
    ylim = [0,10]
    mean = np.mean(pop)
    median = np.median(pop)
    std1 = (mean-np.std(pop))*np.ones(2)
    std2 = (mean+np.std(pop))*np.ones(2)
    q25 = np.percentile(pop,[25])*np.ones(2)
    q75 = np.percentile(pop,[75])*np.ones(2)
    var = np.var(pop)
    skew = stats.skew(pop)
    kurtosis = stats.kurtosis(pop)
    
    if dist:
        x = np.linspace(pop.min(), pop.max(), 1000)
        pdf = dist.pdf(x)
        ax.plot(x, pdf, '-r', linewidth=3, label='PDF')
        
    ax.plot(mean*np.ones(2), ylim, label='mean')
    ax.plot(median*np.ones(2), ylim, label='median')
    ax.plot(std1, ylim, label='mean - std')
    ax.plot(std2, ylim, label='mean + std')
    ax.plot(q25, ylim, label='1st quartile')
    ax.plot(q75, ylim, label='3rd quartile')
    ax.text(0.05,0.95,'Mean = %4.1f' % mean, transform=ax.transAxes, fontsize=15)
    ax.text(0.05,0.90,'Median = %4.1f' % median, transform=ax.transAxes, fontsize=15)
    ax.text(0.05,0.85,'Variance = %4.1f' % var, transform=ax.transAxes, fontsize=15)
    ax.text(0.05,0.80,'Skew = %4.1f' % skew, transform=ax.transAxes, fontsize=15)
    ax.text(0.05,0.75,'Kurtosis = %4.1f' % kurtosis, transform=ax.transAxes, fontsize=15)
    ax.set_ylim(0,1.4*max(n))
    ax.legend()
    
In [281]:
    
# 10000 nums from a uniform dist
dist = stats.uniform(0,1)
pop = dist.rvs(10000)
# alternatively
# pop = np.random.random(1000)
plot_dist(pop, dist)
    
    
Normal Distribution (Top Hat)
scipy.stats.uniform(left_edge, right_edge)np.random.random()
In [282]:
    
display(uniform_dist)
    
    
In [283]:
    
# 10000 nums from a uniform dist
dist = stats.uniform(0,1)
pop = dist.rvs(10000)
# alternatively
# pop = np.random.random(1000)
plot_dist(pop, dist)
    
    
Gaussian Distribution (Normal Distribution)
scipy.stats.norm(mean, std)
In [284]:
    
display(gaussian_dist)
    
    
In [285]:
    
# 10000 nums from a gaussian dist
dist = stats.norm(0,1)
pop = dist.rvs(10000)
# alternatively
# pop = np.random.random(1000)
plot_dist(pop, dist)
    
    
Binomial Distribution
scipy.stats.binom(N trials, prob b)
In [286]:
    
display(binomial_dist)
    
    
In [287]:
    
# 10000 nums from a binomial dist
dist = stats.binom(20,0.5) # 20 trials, probability 0.5
pop = dist.rvs(10000)
plot_dist(pop)
    
    
Poisson Distribution
scipy.stats.poisson(mu)
In [288]:
    
display(poisson_dist)
    
    
In [289]:
    
# 10000 nums from a poisson dist
dist = stats.poisson(5) # mean = 20
pop = dist.rvs(10000)
plot_dist(pop)
    
    
Cauchy (Lorentzian) Distribution
scipy.stats.cauchy(mu, gamma)
In [290]:
    
display(cauchy_dist)
    
    
In [293]:
    
# 10000 nums from a poisson dist
dist = stats.cauchy(0,1)
pop = dist.rvs(100) #increase N and see divergence
plot_dist(pop)
    
    
Exponential (Laplace) Distribution
scipy.stats.laplace(mu, delta)
In [294]:
    
display(laplace_dist)
    
    
In [295]:
    
# 10000 nums from a laplace dist
dist = stats.laplace(0, 10)
pop = dist.rvs(10000) #increase N and see divergence
plot_dist(pop)
    
    
$\chi^2$ Distribution
scipy.stats.chi2(k)
In [296]:
    
display(chi_squared_dist)
    
    
In [297]:
    
# 10000 nums from a chi^2 dist
dist = stats.chi2(10)
pop = dist.rvs(10000) #increase N and see divergence
plot_dist(pop)
    
    
In [ ]: