In [1]:
%matplotlib inline
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np
from numpy import *
import sys
Compute RSS:
In [2]:
def compute_RSS(H, w, y):
mat = y - np.dot(H, w)
rss = np.sum( np.transpose( mat ) * mat )
return rss
Compute norma:
In [3]:
def compute_norma(vector):
norma = np.sqrt( np.sum( vector ** 2 ) )
return norma
Compute step gradient:
In [4]:
def step_gradient(H, w_current, y, learning_rate):
partial = np.sum( np.transpose(H) * ( y - np.dot(H, w_current) ), axis = 1 )
norma = compute_norma(partial)
w = w_current + ( 2 * learning_rate * partial )
return [w, norma]
Compute complete gradient descent:
In [14]:
def gradient_descent(H, y, learning_rate, epsilon):
w = np.zeros((H.shape[1])) #has the same size of output
rss_total = []
rss_by_step = 0
norma_total = []
norma = epsilon+1
num_iterations = 0
while(norma > epsilon):
[w, norma] = step_gradient(H, w, y, learning_rate)
num_iterations += 1
if num_iterations % 10 == 0:
rss_by_step = compute_RSS(H, w, y)
rss_total.append(rss_by_step)
norma_total.append(norma)
return [w, num_iterations, rss_total, norma_total]
Running the multiple linear regression:
In [15]:
input_filename = "sample_treino.csv"
learning_rate = 0.00003
epsilon = 0.000001
att = genfromtxt(input_filename, delimiter=",", skip_header=1)
H = att[:,0:-1] # Get content to be trained
y = att[:,-1] # Get column of predict variable
H_with_ones = np.c_[np.ones(len(H)), H]
[w, num_iterations, rss_total, norma_total] = gradient_descent(H_with_ones, y, learning_rate, epsilon)
print("\n\nNum iterations: {0}\nRSS: {1}\nW: {2}".format(num_iterations, rss_total[-1], w))
Computing the same values with Scikit-learn
In [16]:
reg = LinearRegression()
reg.fit(H, y)
print("\nCoef with scikit-learn: {0}".format(reg.coef_))
print("Intercept with scikit-learn: {0}\n".format(reg.intercept_))
Plot graphics about the traning
In [23]:
plt.plot(rss_total, label = "RSS")
plt.plot(norma_total, label = "Norma")
plt.xlabel("Iteraction")
plt.ylabel("y values")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
Out[23]: