In [16]:
# TO DO
# - Nothing for the moment

In [ ]:
import os
# OS-independent way to navigate the file system
# Data directory is one directory up in relation to directory of this notebook
curr_dir = os.path.normpath(os.getcwd())
if '/Untracked/Notebooks' in curr_dir:
    data_dir = curr_dir.replace('/Untracked/Notebooks', '/Data')
else:
    # Notebook is tracked in the repo
    data_dir = curr_dir.replace('/Notebooks', '/Data')

#print(data_dir)

In [17]:
# PENALTY FUNCTIONS - SOME EXAMPLES

# multiplier is a positive number > 0 that determines the slope

# Linear Penalty Function
def linearPenalty(x, multiplier=1): 
    return x * multiplier

# Flipped/Inverse Linear Penalty Function
def invLinearPenalty(x, multiplier=1):
    return -x * multiplier

# Linear for negative x and zero for positive x
def leftLinearPenalty(x, multiplier=1):
    if(x < 0): return -x * multiplier
    else: return 0
    
# Linear for positive x and zero for negative x
def rightLinearPenalty(x, multiplier=1):
    if(x < 0): return 0
    else: return x * multiplier

# V shape penalty
def VPenalty(x, multiplier=1):
    if (x < 0): return -x * multiplier
    else: return x
    
# Inverted V shape penalty
def invertedVPenalty(x, multiplier=1):
    if (x < 0): return x * multiplier
    else: return -x * multiplier
    
# Positive parabola penalty
def squaredPenalty(x, multiplier=1):
    return (x**2) * multiplier

# Inverted parabola penalty
def invertedSquaredPenalty(x, multiplier=1):
    return -(x**2) * multiplier

# Non-linear penalty
def nonLinearPenalty(x, multiplier=1):
    return x + x**2 + x**3

In [18]:
penaltyFunctions = {linearPenalty: "Linear Penalty", 
                    invLinearPenalty: "Inverse Linear Penalty",
                    leftLinearPenalty: "Left-Linear Penalty",
                    rightLinearPenalty: "Right-Linear Penalty",
                    VPenalty: "V Penalty",
                    invertedVPenalty: "Inverted-V Penalty",
                    squaredPenalty: "Squared Penalty",
                    invertedSquaredPenalty: "Inverted Squared Penalty",
                    nonLinearPenalty: "Non-Linear Penalty"
                   }

In [19]:
# Given a list of error values, plot the penalty function
# Error = Actual - Predicted value - This is always along a single dimension because the output is always a single
# column in a dataset.
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
import seaborn as sns

# Plot the penalty function for a given list of error values and a given penalty function
def penaltyPlot(errorList, penaltyFunction):
    # Set up the x-axis
    num_points = 200
    x = np.linspace(min(errorList), max(errorList), num_points)
    fig, ax = plt.subplots(figsize=(6,4))
    ax.set(xlabel='Predicted Value - Actual Value')
    ax.set(ylabel='Penalty')
    ax.axvline(x=0, color='black')
    ax.axhline(y=0, color='black')
    ax.set(title=penaltyFunctions[penaltyFunction])
    ax.plot(x, list(map(penaltyFunction,x)))

In [20]:
# Load up the packages to investigate the data
import numpy as np
import pandas as pd

In [21]:
# Add a column of ones to the first column of a dataframe
# and turn it into a matrix
def df_addOnes(dataFrame):
    vals = dataFrame.values
    #add_ones_column = zip(np.ones(len(dataFrame)), vals)
    #feature_matrix = np.matrix([val for val in add_ones_column])
    feature_matrix = np.c_[np.ones(len(dataFrame)), vals]
    return feature_matrix

In [22]:
# Making it easy to calculate the total penalty over the entire dataset
def penalty(df_features, df_output, paramater_value_list, penalty_function):
    
    # df_features is a dataframe of the features (no column of ones added)
    # df_output is a dataframe of the output column (target variable)
    # parameter_value_list is a list of w0, w1, ..., wn+1 where n is the number of features
    #  i.e., the number of columns in df_features.
    
    # Cost of being wrong calculated over the entire data set
    # Will take X and add a first column of 1s to it to enable the matrix multiplication
    # Therefore: X is an m x n matrix and theta is a n x 1 matrix
    
    #### Turn the function inputs into matrices ####
    # Get X and y into the right shapes for use in the penalty function
    # Add a first column of ones to the feature matrix
    # Add a column of 1s to X 
    feature_matrix = df_addOnes(df_features)
    output_matrix = np.matrix(df_output.values)
    parameter_matrix = np.matrix(paramater_value_list).T
    
    #print(feature_matrix.shape, parameter_matrix.shape, output_matrix.shape)
    
    # Difference between the predicted and the actual value
    error = (feature_matrix * parameter_matrix) - output_matrix
    #print(error.shape)
    
    # penaltyPerOutput is an m x 1 matrix where each element is the penalty for
    # the input and its associated output for a particular value of W
    
    # Apply a penalty function to the errors from each row of the dataset
    penaltyPerOutput = list(map(penalty_function,error))
    
    # totalPenalty is the sum of the penalties of each row of the dataset
    totalPenalty = np.sum(penaltyPerOutput)
    
    # The penalty of getting it wrong is 1/2m of the totalPenalty (normalized penalty)
    # m is the number of rows in df_features
    totalPenaltyNorm = totalPenalty / (2 * len(df_features))
    
    return totalPenaltyNorm

In [24]:
# Implement Gradient Descent 
# **NOTE: ONLY for a squared penalty function**
def gradientDescent(df_features, 
                    df_output, 
                    init_params_list, 
                    num_iterations=100, 
                    learning_rate=0.0001, 
                    penalty_function=squaredPenalty):
    # df_features is a dataframe with the features
    # df_ouptut is a dataframe of the output column
    # init_params_list is the list of initial W values, e.g., [-1.0, 3.53]
    # num_iterations is the number of steps taken by the algorithm as it descends the penalty surface
    # learning_rate is the multiplier that determines step size (smaller = smaller step size)
    # penalty_function is the penalty function applied to the machine learning problem
    # NOTE: The formula for gradient descent we're implementing works only for the squaredPenalty function
    
    # Get the inputs into matrix form so we can use matrix multiplication (more efficient)
    feature_matrix = df_addOnes(df_features)
    m = len(feature_matrix) # number of rows of data
    output_matrix = np.matrix(df_output.values)
    parameter_matrix = np.matrix(init_params_list).T

    # This is the initial value of the parameters in matrix form
    w = parameter_matrix
    
    # Set up arrays to capture the running results
    # Specify dtype=object because we're putting arrays into an array
    #running_w = np.empty(num_iterations, dtype = object)
    #running_w = np.array([[ 0.50182941],[-0.07935517]])
    running_w = np.array(parameter_matrix)
    # don't have to specify dtype for the other arrays because we're putting single values into the array
    running_error = np.zeros(num_iterations) 
    running_normError = np.zeros(num_iterations)
    running_penalty = np.zeros(num_iterations)
    
    # Iterate over the dataset num_iterations times and adjust the values of the parameters each time
    for i in range(num_iterations):
        #print(w)
        for j in range(len(parameter_matrix)):
            error = ((feature_matrix * w) - output_matrix).T * np.matrix(feature_matrix[:,j]).T
            normError = (learning_rate/m) * error
            w[j] = w[j] - normError
            #print(w[j])
           
        # w, error, normError and penalty after each iteration
        #running_w[i] = w
        running_w = np.append(running_w, w, axis=0)
        #print(i)
        #print(w)
        #print(running_w)
        running_error[i] = np.sum((feature_matrix * w) - output_matrix.T)
        running_normError[i] = (learning_rate/m) * running_error[i]
        running_penalty[i] = penalty_function(running_error[i])
    

    # w is the value of parameters afer num_iterations
    #print(w)

    # Get the running_w into the right form
    # From https://jasonstitt.com/python-group-iterator-list-function
    running_w = list(zip(*[iter(running_w)] * len(parameter_matrix)))
    
    # error after num_iterations
    final_error = np.sum((feature_matrix * w) - output_matrix.T)

    # Penalty after num_iterations
    final_penalty = penalty_function(final_error)
    
    return w, final_penalty, running_w, running_penalty

In [ ]: