In [1]:
%matplotlib inline
from IPython.html.widgets import interact
from matplotlib import pylab as plt
import matplotlib as mpl
from scipy.stats import norm
import pandas as pd
import numpy as np
import random
random.seed(42) # meaning of life
mpl.rcParams['figure.figsize'] = (18., 6.)

In [2]:
X_MIN = 0.
X_MAX = 100.
N = 100

In [3]:
def linear_model(beta):
    
    def _linear_model(X):
        # Add 'constant' intercept vector
        return X * beta
    
    return _linear_model

def dgp(model, x, e_mu=0, e_sig=15., heteroskedastic=False):   
    # Underlying stochastic nature of reality
    epsilon = norm(e_mu, scale=e_sig)
    
    # Regular Error
    error = np.array(epsilon.rvs(size=N)).reshape(N, 1)
    
    # Add Heteroskedastic error
    if heteroskedastic is True:
        error = np.array(x[:,1])**1.5 * error
        error = e_sig * ((np.mean(error) - error) / np.std(error))
    
    return model(x) + error

In [4]:
def cost(y, y_hat):
    m = y.shape[0]
    return -(1 / (2. * m)) * (np.matrix(y_hat).T * np.matrix(y_hat))


def linear_gradient_descent(X, y, alpha, iterations):
    
    m = X.shape[0]
    
    previous_predictions = []
    previous_costs = []
    
    # Randomly initialize a model
    theta = np.matrix([[random.randrange(0, 100, 1)],
                       [random.randrange(-100, 100, 1 ) / 100.]])
    
    
    # Run gradient descent
    for i in range(iterations):
        y_hat = linear_model(theta)(X)
        previous_predictions.append(y_hat)
        previous_costs.append(cost(y, y_hat))
            
        # theta = theta - ((alpha * 1/float(m)) * ( X.T * ((X * theta) - y)))
        theta = theta - np.multiply(alpha, (1/float(m)) * ( X.T * ((X * theta) - y)))

    return (previous_predictions, previous_costs)



def interactive_gradient_descent(e_sig = 15., 
                                 e_mu = 0., 
                                 heteroskedastic = False,
                                 iterations=50,
                                 alpha=0.0001):
    
    # Randomly select a bunch of data between X_MIN and X_Max
    X = np.array(random.sample(np.linspace(X_MIN, X_MAX, 1000), N))
    X.sort()
    X = X.reshape(N, 1)    
    
    # Add bias node
    bias = np.matrix(np.ones(N)).reshape(N, 1)
    X = np.concatenate((bias, X), axis = 1)
    
    # Generate a bunch of y observations based on an underlying model
    underlying_model = linear_model(np.matrix([[10], [1]]))
    y = dgp(underlying_model, X, heteroskedastic=heteroskedastic)
    
    
    predictions, costs = linear_gradient_descent(X, y, alpha, iterations)

    def _interactive(step=0):
        # Drop the bias column
        data = X[:,1]
      
        fig, (ax1, ax2) = plt.subplots(1, 2)
        # Plot Data with Underlying and OLS Models
        ax1.scatter(data, y, c='#CCCCCC', marker="o", label="Data", alpha=0.6)
        ax1.plot(data, underlying_model(X), "r--", label="Underlying Model")
        
        if step > 0:
            for s in range(step):
                ax1.plot(data, predictions[s], "b", alpha=0.4)
        
        ax1.plot(data, predictions[step], "b", label="Current Model")
        
        ax1.set_xlim(0, 100)
        ax1.set_ylim(0, 100)
        ax1.set_title("Model")
        ax1.legend(loc=4)
    
        ax2.scatter(range(iterations)[:step], costs[:step])
        ax2.set_title("Cost Function")
        #ax2.set_xlim(-5, 5)
    
    return _interactive
    # return costs

In [5]:
i = 1500
interactive_gd = interactive_gradient_descent(iterations=i, alpha=np.matrix([[0.3], [0.00001]]))
interact(interactive_gd, step=(0, i - 1, 1))



In [ ]: