In [3]:
import random
results = []
for trial in xrange(10000):
heads = 0
for i in xrange(100):
flip = random.randint(0,1)
if (flip == 0):
heads += 1
results.append(heads)
print results[1:10]
In [4]:
import matplotlib.pyplot as plt
plt.figure()
plt.hist(results)
plt.show()
In [5]:
## Plot the histogram using integer values by creating more bins
plt.figure()
plt.hist(results, bins=range(100))
plt.title("Using integer values")
plt.show()
In [6]:
## Plot the density function, notice bars sum to exactly 1
## Also make the plot bigger
plt.figure(figsize=(15,6))
plt.hist(results, bins=range(100), normed=True)
plt.title("coin flip densities")
plt.show()
In [7]:
flips_mean = float(sum(results)) / len(results)
print flips_mean
In [8]:
## the numpy package has lots of useful routines: http://www.numpy.org/
import numpy as np
mean = np.mean(results)
print mean
In [9]:
## we could code standard deviation by hand, but numpy makes it easier
stdev=np.std(results)
print stdev
In [10]:
## Overlay a normal distribution on top of the coin flip data
plt.figure(figsize=(15,6))
count, bins, patches = plt.hist(results, bins=range(100), normed=True, label="coin flip histogram")
plt.plot(bins, 1/(stdev * np.sqrt(2 * np.pi)) *
np.exp( - (bins - mean)**2 / (2 * stdev**2) ),
linewidth=3, color='red', label="normal distribution")
plt.title("Coin flip densities with normal distribution overlay")
plt.legend()
plt.show()
Reminder:
$$ {n \choose k} = \frac{n!}{k!(n-k)!} $$
In [11]:
prob_heads = .5
num_flips = 100
num_heads = 25
prob_flips = np.math.factorial(num_flips) / \
(np.math.factorial(num_heads) * np.math.factorial(num_flips-num_heads)) * \
(prob_heads**num_heads) * ((1-prob_heads)**(num_flips-num_heads))
print "The probability of seeing %d heads in %d flips is %.015f" % (num_heads, num_flips, prob_flips)
In [14]:
## Another super useful package is scipy
import scipy.stats
sp_prob = scipy.stats.binom.pmf(num_heads, num_flips, prob_heads)
print "scipy computed it as %0.15f" % sp_prob
In [16]:
## normal approximatation
print scipy.stats.norm(50, 5).pdf(25)
In [28]:
## Overlay a normal distribution on top of the coin flip data
plt.figure(figsize=(15,6))
count, bins, patches = plt.hist(results, bins=range(100), normed=True, label="coin flip histogram")
plt.plot(bins, scipy.stats.binom.pmf(bins, num_flips, prob_heads),linewidth=3, color='red', label="binomial distribution")
plt.plot(bins, scipy.stats.norm(50,5).pdf(bins),linewidth=3, color='green', linestyle='--', label="normal distribution")
plt.title("Coin flip densities with normal distribution overlay")
plt.legend()
plt.show()
In [13]:
expected_mean = num_flips * prob_heads
expected_stdev = np.math.sqrt(num_flips * prob_heads * (1 - prob_heads))
print "In %d flips, with a probability %.02f" % (num_flips, prob_heads)
print "The expected frequency is %.02f +/- %.02f" % (expected_mean, expected_stdev)
print "The observed frequency was %0.2f +/- %0.2f" % (mean, stdev)