In [109]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [110]:
population = np.random.randint(1,high=100,size=50)
In [111]:
mean = np.mean(population)
print(mean)
In [112]:
sem = np.std(population) / np.sqrt(len(population))
print(sem)
In [113]:
replicates = np.empty(10000)
for i in range(10000):
sample = np.random.choice(population, size=len(population))
replicates[i] = np.mean(sample)
In [114]:
# mean of the sampled data.
sampled_mean = np.mean(replicates)
print(sampled_mean)
In [115]:
# std of the sampled distr.
replicates_std = np.std(replicates)
print(replicates_std)
In [116]:
# 95% confidence interval
percentiles = np.percentile(replicates,[2.5,97.5])
print(percentiles)
In [117]:
sns.set()
_ = plt.hist(replicates,bins=int(np.sqrt(len(replicates))),normed=True)
_ = plt.xlabel('data')
_ = plt.ylabel('%')
# add mean
plt_x_mean = np.repeat(sampled_mean, 2)
plt_y_mean = [0, 0.14]
_ = plt.plot(plt_x_mean,plt_y_mean,color='red')
plt.margins(.02)
plt.show()