In [1]:
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
%matplotlib inline
In [2]:
b = 3.0
pareto = st.pareto(b, loc=0.0, scale=3.0)
values = pareto.rvs(size=1000)
In [3]:
x = np.linspace(min(values), max(values), 1000)
pareto_pdf = pareto.pdf(x)
In [4]:
plt.hist(values, normed=True)
plt.plot(x, pareto_pdf)
plt.xlabel("x")
plt.ylabel("f(x)")
Out[4]:
In [5]:
def evaluate_distribution(distribution, sample_size):
mu = distribution.mean()
sigma = np.sqrt(distribution.var() / sample_size)
norm_distribution = st.norm(loc=mu, scale=sigma)
x = np.linspace(mu - 5 * sigma, mu + 5 * sigma, 100)
norm_pdf = norm_distribution.pdf(x)
averages = []
for i in xrange(0, 1000):
averages.append(np.mean(distribution.rvs(sample_size)))
plt.hist(averages, normed=True)
plt.plot(x, norm_pdf)
plt.xlabel("x")
plt.ylabel("f(x)")
In [6]:
evaluate_distribution(pareto, 5)
In [7]:
evaluate_distribution(pareto, 10)
In [8]:
evaluate_distribution(pareto, 50)
In [ ]: