In [1]:
%matplotlib inline
from IPython.html.widgets import interact
from matplotlib import pylab as plt
import matplotlib as mpl
from scipy.stats import norm
import numpy as np
import random
random.seed(42) # meaning of life
mpl.rcParams['figure.figsize'] = (18., 6.)

In [2]:
X_MIN = 0.
X_MAX = 100.
N = 100

In [3]:
def linear_model(*beta):
    beta = np.array(beta).reshape(len(beta), 1)
    
    def _linear_model(x):
        # Add 'constant' intercept vector
        row_len, col_len = x.shape
        X = np.hstack([np.ones(row_len).reshape(row_len, 1), x])
        return X.dot(beta)
    
    return _linear_model

In [4]:
def dgp(model, x, e_mu=0, e_sig=2.5, heteroskedastic=False):   
    # Underlying stochastic nature of reality
    epsilon = norm(e_mu, scale=e_sig)
    
    # Regular Error
    error = np.array(epsilon.rvs(size=N)).reshape(N, 1)
    
    # Add Heteroskedastic error
    if heteroskedastic is True:
        error = x**1.5 * error
        error = e_sig * ((np.mean(error) - error) / np.std(error))
    
    return model(x) + error

In [5]:
def OLS_normal_estimate(x, y):
    row_len, col_len = x.shape
    # Add 'constant' intercept vector
    X = np.matrix(np.hstack([np.ones(row_len).reshape(row_len, 1), x]))
    y = np.matrix(y)
    return np.squeeze(np.asarray( np.linalg.inv(X.T * X) * X.T * y ))

In [6]:
data = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
data.sort()
data = data.reshape(N, 1)

In [7]:
def plot_error_interactive_model(e_sig=10., e_mu = 0, heteroskedastic=False):
    
    underlying_model = linear_model(0, 1)
    y = dgp(underlying_model, data, e_sig=e_sig, e_mu=e_mu, heteroskedastic=heteroskedastic)
    
    OLS_model = linear_model(*OLS_normal_estimate(data, y))
    y_hat = OLS_model(data)
    
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)
    # Plot Data with Underlying and OLS Models
    ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
    ax1.plot(data, underlying_model(data), "r--", label="Underlying Model")
    ax1.plot(data, y_hat, "b", label="OLS Model")
    ax1.set_xlim(-20, 120)
    ax1.set_ylim(-10, 120)
    ax1.set_title("Univariate OLS Model")
    ax1.legend(loc=4)
    
    ax2.hist(y - y_hat, bins=25, color="#CCCCCC")
    ax2.set_title("Error Distribution")
    ax2.set_xlim(-80, 80)
    
    ax3.scatter(y_hat, y - y_hat, c='#CCCCCC', marker="o")
    ax3.set_title("Fit vs Risidual")

In [8]:
interact(plot_error_interactive_model,
         e_sig=(0., 100., 0.3), 
         e_mu = (-20., 20., 0.5),
         heteroskedastic=False)



In [28]:



Out[28]:
array([[ 347744.79183889]])

In [9]:
def squared_error(y, y_hat):
    m = len(y)
    return (1. / 2. * m) * (y - y_hat).T.dot( (y - y_hat) )

def interactive_gradient_descent(e_sig = 15., e_mu = 0., heteroskedastic = False):
    data = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
    data.sort()
    data = data.reshape(N, 1)    
    
    underlying_model = linear_model(0, 1)
    y = dgp(underlying_model, data, e_sig=e_sig, e_mu=e_mu, heteroskedastic=heteroskedastic)
    
    previous_predictions = []
    
    def _interactive(m =0 , b = 0):
        y_hat = linear_model(b,m)(data)
        previous_predictions.append((m, squared_error(y, y_hat)))
        
        fig, (ax1, ax2) = plt.subplots(1, 2)
        # Plot Data with Underlying and OLS Models
        ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
        ax1.plot(data, underlying_model(data), "r--", label="Underlying Model")
        ax1.plot(data, y_hat, "b", label="Arbitrary Model")
        ax1.set_xlim(-20, 120)
        ax1.set_ylim(-10, 120)
        ax1.set_title("Model")
        ax1.legend(loc=4)
    
        ax2.scatter(*zip(*previous_predictions))
        ax2.set_title("Cost Function")
        ax2.set_xlim(-5, 5)
    
    return _interactive

interactive_gd = interactive_gradient_descent()

In [10]:
interact(interactive_gd,
         m = (-3, 3, 0.1 ),
         b = (0, 100, 5))



In [36]:



Out[36]:
array([[ 0.]])

In [ ]: