In [1]:
%matplotlib inline
from IPython.html.widgets import interact
from matplotlib import pylab as plt
import matplotlib as mpl
from scipy.stats import norm
import numpy as np
import random
random.seed(42) # meaning of life
mpl.rcParams['figure.figsize'] = (18., 6.)
In [2]:
X_MIN = 0.
X_MAX = 100.
N = 100
In [3]:
def linear_model(*beta):
beta = np.array(beta).reshape(len(beta), 1)
def _linear_model(x):
# Add 'constant' intercept vector
row_len, col_len = x.shape
X = np.hstack([np.ones(row_len).reshape(row_len, 1), x])
return X.dot(beta)
return _linear_model
In [4]:
def dgp(model, x, e_mu=0, e_sig=2.5, heteroskedastic=False):
# Underlying stochastic nature of reality
epsilon = norm(e_mu, scale=e_sig)
# Regular Error
error = np.array(epsilon.rvs(size=N)).reshape(N, 1)
# Add Heteroskedastic error
if heteroskedastic is True:
error = x**1.5 * error
error = e_sig * ((np.mean(error) - error) / np.std(error))
return model(x) + error
In [5]:
def OLS_normal_estimate(x, y):
row_len, col_len = x.shape
# Add 'constant' intercept vector
X = np.matrix(np.hstack([np.ones(row_len).reshape(row_len, 1), x]))
y = np.matrix(y)
return np.squeeze(np.asarray( np.linalg.inv(X.T * X) * X.T * y ))
In [6]:
data = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
data.sort()
data = data.reshape(N, 1)
In [7]:
def plot_error_interactive_model(e_sig=10., e_mu = 0, heteroskedastic=False):
underlying_model = linear_model(0, 1)
y = dgp(underlying_model, data, e_sig=e_sig, e_mu=e_mu, heteroskedastic=heteroskedastic)
OLS_model = linear_model(*OLS_normal_estimate(data, y))
y_hat = OLS_model(data)
fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
# Plot Data with Underlying and OLS Models
ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
ax1.plot(data, underlying_model(data), "r--", label="Underlying Model")
ax1.plot(data, y_hat, "b", label="OLS Model")
ax1.set_xlim(-20, 120)
ax1.set_ylim(-10, 120)
ax1.set_title("Univariate OLS Model")
ax1.legend(loc=4)
ax2.hist(y - y_hat, bins=25, color="#CCCCCC")
ax2.set_title("Error Distribution")
ax2.set_xlim(-80, 80)
ax3.scatter(y_hat, y - y_hat, c='#CCCCCC', marker="o")
ax3.set_title("Fit vs Risidual")
In [8]:
interact(plot_error_interactive_model,
e_sig=(0., 100., 0.3),
e_mu = (-20., 20., 0.5),
heteroskedastic=False)
In [28]:
Out[28]:
In [9]:
def squared_error(y, y_hat):
m = len(y)
return (1. / 2. * m) * (y - y_hat).T.dot( (y - y_hat) )
def interactive_gradient_descent(e_sig = 15., e_mu = 0., heteroskedastic = False):
data = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
data.sort()
data = data.reshape(N, 1)
underlying_model = linear_model(0, 1)
y = dgp(underlying_model, data, e_sig=e_sig, e_mu=e_mu, heteroskedastic=heteroskedastic)
previous_predictions = []
def _interactive(m =0 , b = 0):
y_hat = linear_model(b,m)(data)
previous_predictions.append((m, squared_error(y, y_hat)))
fig, (ax1, ax2) = plt.subplots(1, 2)
# Plot Data with Underlying and OLS Models
ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
ax1.plot(data, underlying_model(data), "r--", label="Underlying Model")
ax1.plot(data, y_hat, "b", label="Arbitrary Model")
ax1.set_xlim(-20, 120)
ax1.set_ylim(-10, 120)
ax1.set_title("Model")
ax1.legend(loc=4)
ax2.scatter(*zip(*previous_predictions))
ax2.set_title("Cost Function")
ax2.set_xlim(-5, 5)
return _interactive
interactive_gd = interactive_gradient_descent()
In [10]:
interact(interactive_gd,
m = (-3, 3, 0.1 ),
b = (0, 100, 5))
In [36]:
Out[36]:
In [ ]: