In [1]:
%matplotlib inline
from IPython.html.widgets import interact
from matplotlib import pylab as plt
import matplotlib as mpl
from scipy.stats import norm
import pandas as pd
import numpy as np
import random
random.seed(42) # meaning of life
mpl.rcParams['figure.figsize'] = (18., 6.)
In [2]:
X_MIN = 0.
X_MAX = 100.
N = 100
In [3]:
def linear_model(beta):
def _linear_model(X):
# Add 'constant' intercept vector
return X * beta
return _linear_model
def dgp(model, x, e_mu=0, e_sig=15., heteroskedastic=False):
# Underlying stochastic nature of reality
epsilon = norm(e_mu, scale=e_sig)
# Regular Error
error = np.array(epsilon.rvs(size=N)).reshape(N, 1)
# Add Heteroskedastic error
if heteroskedastic is True:
error = np.array(x[:,1])**1.5 * error
error = e_sig * ((np.mean(error) - error) / np.std(error))
return model(x) + error
In [4]:
def cost(y, y_hat):
m = y.shape[0]
return -(1 / (2. * m)) * (np.matrix(y_hat).T * np.matrix(y_hat))
def linear_gradient_descent(X, y, alpha, iterations):
m = X.shape[0]
previous_predictions = []
previous_costs = []
# Randomly initialize a model
theta = np.matrix([[random.randrange(0, 100, 1)],
[random.randrange(-100, 100, 1 ) / 100.]])
# Run gradient descent
for i in range(iterations):
y_hat = linear_model(theta)(X)
previous_predictions.append(y_hat)
previous_costs.append(cost(y, y_hat))
# theta = theta - ((alpha * 1/float(m)) * ( X.T * ((X * theta) - y)))
theta = theta - np.multiply(alpha, (1/float(m)) * ( X.T * ((X * theta) - y)))
return (previous_predictions, previous_costs)
def interactive_gradient_descent(e_sig = 15.,
e_mu = 0.,
heteroskedastic = False,
iterations=50,
alpha=0.0001):
# Randomly select a bunch of data between X_MIN and X_Max
X = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
X.sort()
X = X.reshape(N, 1)
# Add bias node
bias = np.matrix(np.ones(N)).reshape(N, 1)
X = np.concatenate((bias, X), axis = 1)
# Generate a bunch of y observations based on an underlying model
underlying_model = linear_model(np.matrix([[10], [1]]))
y = dgp(underlying_model, X, heteroskedastic=heteroskedastic)
predictions, costs = linear_gradient_descent(X, y, alpha, iterations)
def _interactive(step=0):
# Drop the bias column
data = X[:,1]
fig, (ax1, ax2) = plt.subplots(1, 2)
# Plot Data with Underlying and OLS Models
ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
ax1.plot(data, underlying_model(X), "r--", label="Underlying Model")
if step > 0:
for s in range(step):
ax1.plot(data, predictions[s], "b", alpha=0.4)
ax1.plot(data, predictions[step], "b", label="Current Model")
ax1.set_xlim(0, 100)
ax1.set_ylim(0, 100)
ax1.set_title("Model")
ax1.legend(loc=4)
ax2.scatter(range(iterations)[:step], costs[:step])
ax2.set_title("Cost Function")
#ax2.set_xlim(-5, 5)
return _interactive
# return costs
In [5]:
i = 1500
interactive_gd = interactive_gradient_descent(iterations=i, alpha=np.matrix([[0.3], [0.00001]]))
interact(interactive_gd, step=(0, i - 1, 1))
In [ ]: