In [1]:
%matplotlib inline
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
np.random.seed(20160302)
Here is a population of size 100,000.
In [2]:
mu, sigma = 64, 8
popn = np.random.normal(loc=mu,scale=sigma, size=100000)
truemu, truesigma = np.mean(popn), np.std(popn)
In [3]:
s = \
"""For the population of interest, the true mean is {}
and the true standard deviation is {} """
print(s.format(truemu,truesigma))
This is what the population distribution looks like when represented as a frequency histogram.
In [21]:
plt.hist(popn, bins=50, color='gray', alpha=0.75, histtype='stepfilled')
plt.xlabel("X")
plt.ylabel("Frequency")
pass
Here is sample of size 60 drawn without replacement from this population:
In [5]:
sample = np.random.choice(popn, size=60, replace=False)
In [6]:
s = \
"""For the population of interest, the point estimates of the
mean and standard deviation are {} and {}, respectively"""
print(s.format(np.mean(sample),np.std(sample,ddof=1)))
Here is what the frequency histogram of the sample looks like.
In [31]:
plt.hist(sample, color='steelblue', alpha=0.75,
histtype='stepfilled',label='sample')
plt.xlabel("X")
plt.ylabel("Frequency")
pass
Here's density histograms of the population (gray) and the sample (blue) drawn together.
In [22]:
plt.hist(popn, normed=True, bins=50, label='population',
color='gray', alpha=0.75, histtype='stepfilled')
plt.hist(sample, normed=True, label='sample',
color='steelblue', alpha=0.75, histtype='stepfilled')
plt.xlabel("X")
plt.ylabel("Density")
plt.legend(loc="best")
pass
To estimate the sampling distribution of the mean, we draw 1000 samples of size 60 and calculate the mean for each such sample.
In [9]:
smeans = []
for i in range(1000):
rsample = np.random.choice(popn, size=60, replace=False)
smeans.append(np.mean(rsample))
Here is the frequency histogram of the sampling distribution of the mean.
In [36]:
plt.hist(smeans, normed=True, bins=30, label='simulated\ndistn of\n means',
color='firebrick', alpha=0.75, histtype='stepfilled')
plt.xlabel("mean(X)")
plt.ylabel("Frequency")
pass
Here are the density histograms of the population (grey), our first sample (blue), and the sampling distribution of the mean(red), all drawn in the same plot.
IMPORTANT NOTE: to facilitate visual comparison of the distributions I've truncated the y-axis. Comment out the ylim
line below to see the complete density histogram of the sampling distribution of sample means.
In [37]:
plt.hist(popn, normed=True, bins=50, label='population',
color='gray', alpha=0.75, histtype='stepfilled')
plt.hist(sample, normed=True, label='sample',
color='steelblue', alpha=0.75, histtype='stepfilled')
plt.hist(smeans, normed=True, bins=50, label='simulated\ndistn of\n means',
color='firebrick', alpha=0.75, histtype='stepfilled')
plt.xlabel("X")
plt.ylabel("Density")
plt.legend(loc="best")
plt.ylim(0,0.06) # comment out this line to remove truncation
pass
In [ ]: