In [39]:
data = [-4.4,-10.1,1.6,4.0,3.4,-2.0,-5.8,-1.9,-3.1,1.8,-2.9,4.9,2.7,-9.5,2.8,4.3,-1.1,3.9,-9.1,5.6,1.1,-1.0,0.7,9.7,-1.6,-2.3,1.4,2.5,-9.0,1.1]
### BEGIN SOLUTION
import scipy.stats as stats
x = np.mean(data)
s = np.std(data, ddof=1)
Z = stats.norm.ppf(0.975)
print('{:.2f} +/- {:.2f}'.format(x, s * Z / np.sqrt(len(data))))
### END SOLUTION
In [40]:
Z = stats.norm.ppf(0.90)
print('{:.2f} +/- {:.2f}'.format(x, s * Z / np.sqrt(len(data))))
In [41]:
data = [11.5,10.1,20.5,12.4,7.9,9.1,18.1,13.4]
### BEGIN SOLUTION
x = np.mean(data)
s = np.std(data, ddof=1)
T = stats.t.ppf(0.975, df=len(data))
print('{:.2f} +/- {:.2f}'.format(x, s * T / np.sqrt(len(data))))
### END SOLUTION
In [42]:
Z = stats.norm.ppf(0.975)
print('{:.2f} +/- {:.2f}'.format(x, 5 * Z / np.sqrt(len(data))))
In [43]:
import numpy as np
import matplotlib.pyplot as plt
#make some ponits
x = np.linspace(-5, 5, 500)
#plot them using the scipystats norm pdf
plt.plot(x, stats.norm.pdf(x))
plt.xlabel('x')
plt.ylabel('P(x)')
plt.show()
In [44]:
#get standard error from this info
se = 0.25 / np.sqrt(7)
#make 500 points from -5 sigma to 5 sigma
x = np.linspace(-5 * se, 5 * se, 500)
plt.plot(x, stats.norm.pdf(x, scale=se))
plt.xlabel('$x - \mu$')
#prefix with r to allow curly braces
plt.ylabel(r'$P(\bar{x} - \mu)$')
plt.show()
In [45]:
#make points consistent for all graphs
#use widest possible one
x = np.linspace(-5 * 1.2 / np.sqrt(3) + 3.5, 5 * 1.2 / np.sqrt(3) + 3.5, 1000)
#iterate over sample count and plot
for N in [3, 5, 10, 25]:
se = 1.2 / np.sqrt(N)
plt.plot(x, stats.t.pdf(x, loc=3.5, scale=se,df=N), label='N = {}'.format(N))
plt.legend(loc='best')
plt.xlabel('$\mu$')
plt.ylabel(r'$P(\mu)$')
plt.show()
In [60]:
#get standard error from this info
se = 3.2 / np.sqrt(8)
#make 500 points from -5 sigma to 5 sigma
x = np.linspace(-5 * se + 5, 5 * se + 5, 500)
plt.plot(x, stats.norm.pdf(x, scale=se, loc=5))
y = stats.t.ppf(0.975, df=8)
x2 = np.linspace(5 - y * se, 5 + y * se, 500)
plt.fill_between(x2,stats.norm.pdf(x2, scale=se, loc=5), color='lightgray')
plt.xlabel('$\mu$')
#prefix with r to allow curly braces
plt.ylabel(r'$P(\mu)$')
plt.show()
The proportion of students graduating at colleges follows a Beta distribution. Each year, a college rankings agency compiles these rates for 100 colleges and also reports the average. If the sample mean and sample standard deviation are 0.74 and 0.21, respectively, what is the population mean for all colleges with 95% confidence
In [46]:
Z = stats.norm.ppf(0.975)
print('{} +/- {:.2f}'.format(0.74, 0.21 * Z / np.sqrt(100)))
The number of fatalities due to work related injuries follows a Poisson distribution at each job site. OSHA computes the average for each state taken from 45 random job sites. All numbers are in fatalities per thousand workers. Alaska has a sample mean of 7.4 a sample standard deviation of 6.0 and Texas has a sample mean of 5.3 with a sample standard deviation of 2.3. Plot the probability for the population mean number of accidents per thousand workers for these two states.
In [47]:
#get standard error from this info
se1 = 6.0 / np.sqrt(45)
se2 = 2.3 / np.sqrt(45)
#some trial and error to get this number
x = np.linspace(2, 12, 1000)
plt.plot(x, stats.norm.pdf(x, scale=se1, loc=7.4), label='Alaska')
plt.plot(x, stats.norm.pdf(x, scale=se2, loc=5.3), label='Texas')
plt.legend(loc='best')
plt.xlabel('Fatalities per Thousand Workers')
#prefix with r to allow curly braces
plt.ylabel('Probability')
plt.show()