In [1]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os
In [2]:
def straight_line(x):
return 5 * x + 8
In [3]:
def straight_line_weight(weight1, x):
return weight1 * x + 8
In [4]:
np.random.seed(5)
x_vals = pd.Series(np.random.rand(150) * 20)
y_vals = x_vals.map(straight_line)
In [5]:
df = pd.DataFrame({'x1': x_vals,
'y': y_vals})
In [6]:
# One Feature example
# Training Set - Contains several examples of feature 'x' and corresponding correct answer 'y'
# Objective is to find out the form y = w0 + w1*x1
df.head()
Out[6]:
In [7]:
df.tail()
Out[7]:
In [8]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x = x_vals,
y = y_vals)
plt.xlabel('Feature x1')
plt.ylabel('Target y')
plt.grid(True)
plt.title('Training Set - One Feature')
Out[8]:
In [9]:
weights = [3, 4, 5, 6, 7]
y_at_weight = {}
for w1 in weights:
y_calculated = []
y_at_weight[w1] = y_calculated
for x in x_vals:
y_calculated.append(straight_line_weight(w1,x))
In [10]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x = x_vals,
y = y_vals,
label = 'actual')
plt.scatter(x = x_vals, y = y_at_weight[3], color = 'r', marker = '+', label = 'weight 3')
plt.scatter(x = x_vals, y = y_at_weight[4], color = 'g', label = 'weight 4')
plt.scatter(x = x_vals, y = y_at_weight[5], label = 'weight 5')
plt.scatter(x = x_vals, y = y_at_weight[6], color = 'y', label = 'weight 6')
plt.scatter(x = x_vals, y = y_at_weight[7], color = 'k', marker='+', label = 'weight 7')
plt.xlabel('Feature x1')
plt.ylabel('Predicted y')
plt.title('Predicted Output for different weights')
plt.grid(True)
plt.legend()
Out[10]:
In [11]:
# For a set of weights, let's find out loss or cost
weight = pd.Series(np.linspace(3, 7, 100))
In [12]:
weight.head()
Out[12]:
In [13]:
weight.tail()
Out[13]:
In [14]:
# Cost/Loss Calculation: Squared loss function...a measure of how far is predicted value from actual
# Steps :
# For every weight and feature x, compute predicted y
# Now find out loss by = average ((predicted y - actual y)**2)
loss_at_wt = []
for w1 in weight:
y_predicted = []
for x in x_vals:
y_predicted.append(straight_line_weight(w1, x))
loss_at_wt.append(((y_vals - y_predicted) ** 2).mean())
In [15]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x = weight,
y = loss_at_wt)
plt.grid(True)
plt.xlabel('Weight for feature 1')
plt.ylabel('Loss')
plt.title('Loss Curve - Loss at different weight')
Out[15]:
In [16]:
# Let's look at a quadratic example: y = x**2 + x + c
# two features: x**2 and x
In [17]:
def quad_func (x):
return 25 * x ** 2 -80 * x + 64
In [18]:
def quad_func_weight(weight2, x):
#For different weights of quadratic term
# Acutal eqn. 25x^2 - 80x + 64. We have fixed w1=-80,w0=64. need to find w2.
return weight2 * x ** 2 -80 * x + 64
In [19]:
# Quadratic
np.random.seed(5)
x_vals = pd.Series(np.random.rand(150) * 20)
y_vals = x_vals.map(quad_func)
In [20]:
plt.scatter(x = x_vals,
y = y_vals)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Training Set - Two Features')
plt.grid(True)
In [21]:
weights = [0, 20, 30, 50]
y_at_weight = {}
for w1 in weights:
y_calculated = []
y_at_weight[w1] = y_calculated
for x in x_vals:
y_calculated.append(quad_func_weight(w1, x))
In [22]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x = x_vals, y = y_vals, label = 'actual')
plt.scatter(x = x_vals, y = y_at_weight[0], label = 'weight 0', color = 'r')
plt.scatter(x = x_vals, y = y_at_weight[20], label = 'weight 20', color = 'g')
plt.scatter(x = x_vals, y = y_at_weight[30], label = 'weight 30', color = 'k')
plt.scatter(x = x_vals, y = y_at_weight[50], label = 'weight 50', color = 'y')
plt.xlabel('x')
plt.ylabel('Predicted y')
plt.title('Predicted Output for different weights')
plt.grid(True)
plt.legend()
Out[22]:
In [23]:
# Initialize Weights for feature 2
weight = pd.Series(np.linspace(-20, 70, 200))
loss_at_wt = []
for w1 in weight:
y_calculated = []
for x in x_vals:
y_calculated.append(quad_func_weight(w1,x))
loss_at_wt.append(((y_vals - y_calculated) ** 2).mean())
In [24]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(x = weight,
y = loss_at_wt)
plt.grid(True)
plt.xlabel('Weight for feature 2')
plt.ylabel('Loss')
plt.title('Loss Curve - Loss at different weight')
Out[24]:
Squared Loss Function is parabolic in nature. It has an important property of not only telling us the loss at a given weight, but also tells us which way to go to minimize loss
Gradient Descent optimization alogrithm uses loss function to move the weights of all the features and iteratively adjusts the weights until optimal value is reached
Batch Gradient Descent predicts y value for all training examples and then adjusts the value of weights based on loss. It can converge much slower when training set is very large. Training set order does not matter as every single example in the training set is considered before making adjustments
Stochastic Gradient Descent predicts y value for next training example and immediately adjusts the value of weights.
It can converge faster when training set is very large. Training set should be random order otherwise model will not learn correctly. AWS ML uses Stochastic Gradient Descent