In [ ]:
import numpy as np
import scipy.stats as stat
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')
%matplotlib inline

Generating Random Numbers


In [ ]:
x = np.random.normal(size=1000)
plt.plot(x)

Q: What can we infer from the plot?


In [ ]:
fig, ax = plt.subplots(nrows=1, ncols=3)
for i, n_bins in enumerate([10, 20, 50]):
    _ax = ax[i]
    bins = np.linspace(-4, 5, n_bins)
    histogram = np.histogram(x, bins=bins, normed=True)[0]
    bins = 0.5*(bins[1:] + bins[:-1])
    _ax.plot(bins, histogram)
    _ax.set_title("Bins = %d" % n_bins)
fig.tight_layout()

Q: What's the problem with histograms?


In [ ]:
fig, ax = plt.subplots(nrows=1, ncols=3)
for i, n_bins in enumerate([10, 20, 50]):
    _ax = ax[i]
    bins = np.linspace(-4, 5, n_bins)
    histogram = np.histogram(x, bins=bins, normed=True)[0]
    bins = 0.5*(bins[1:] + bins[:-1])
    _ax.plot(bins, histogram)
    _ax.set_title("Bins = %d" % n_bins)
    estimate = stat.norm.pdf(bins)
    _ax.plot(bins, estimate, "g-")
fig.tight_layout()

Fitting parameters


In [ ]:
mean, std = stat.norm.fit(x)
print("Estimated Mean: ", mean)
print("Estimated STD: ", std)

Exercise: Verify the law of large numbers

Plot the error between actual and estimated mean and covariance for each sample size.


In [ ]:
# enter code here