In [1]:
import numpy as np
import math
import time
In [2]:
# y = mx + b
# m is slope, b is y-intercept
def compute_mse(b, m, points):
totalError = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
totalError += (y - (m * x + b)) ** 2
return totalError / float(len(points))
Função para fazer uma atualização dos parâmetros no Gradiente Descendente:
$w_0 = w_0 + 2\alpha\sum_{i=1}^N (y_i - (w_0+w_1x_i))$
$w_1 = w_1 + 2\alpha\sum_{i=1}^N x_i(y_i - (w_0+w_1x_i))$
In [24]:
def step_gradient(b_current, m_current, points, learningRate):
b_gradient = 0
m_gradient = 0
for i in range(0, len(points)):
x = points[i, 0]
y = points[i, 1]
b_gradient += (y - ((m_current * x) + b_current))
m_gradient += x * (y - ((m_current * x) + b_current))
new_b = b_current + (2 * learningRate * b_gradient)
new_m = m_current + (2 * learningRate * m_gradient)
return [new_b, new_m, b_gradient, m_gradient]
$||\mathbf{w}||_2 = \sqrt{\sum_{j=1}^D w_j^2}$
In [25]:
def norm_2(x):
c=0
for i in range(len(x)):
c += x[i]**2
return math.sqrt(c)
Função para iterar sobre o gradiente descendente até convergência.
In [39]:
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, epsilon):
b = starting_b
m = starting_m
grad = np.array([np.inf,np.inf])
i = 0
while (norm_2(grad)>=epsilon):
b, m, b_gradient, m_gradient = step_gradient(b, m, points, learning_rate)
grad = np.array([b_gradient,m_gradient])
if i % 1000 == 0:
#print(norm_2(grad))
print("MSE na iteração {0} é de {1}".format(i,compute_mse(b,m,points)))
i+= 1
return [b, m]
In [42]:
points = np.genfromtxt("income.csv", delimiter=",")
learning_rate = 0.0001
initial_b = 0 # initial y-intercept guess
initial_m = 0 # initial slope guess
#num_iterations = 10000
epsilon = 0.5
print("Starting gradient descent at b = {0}, m = {1}, error = {2}".format(initial_b, initial_m, compute_mse(initial_b, initial_m, points)))
print("Running...")
tic = time.time()
[b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, epsilon)
toc = time.time()
print("Gradiente descendente convergiu com w0 = {0}, w1 = {1}, erro = {2}".format(b, m, compute_mse(b, m, points)))
print("Versão vetorizada rodou em: " + str(1000*(toc-tic)) + "ms")
$MSE(\hat{w})=\frac{1}{N}(y-\hat{\mathbf{w}}^T\mathbf{x})^T(y-\hat{\mathbf{w}}^T\mathbf{x})$
In [32]:
def compute_mse_vectorized(w,X,Y):
res = Y - np.dot(X,w)
totalError = np.dot(res.T,res)
return totalError / float(len(Y))
In [36]:
def step_gradient_vectorized(w_current,X,Y,learningRate):
res = Y - np.dot(X,w_current)
b_gradient = np.sum(res)
X = X[:,1][:,np.newaxis]
m_gradient = np.sum(np.multiply(res,X))
new_w = np.array([(w_current[0] + (2 * learningRate * b_gradient)),
(w_current[1] + (2 * learningRate * m_gradient))])
return [new_w,b_gradient,m_gradient]
In [34]:
def gradient_descent_runner_vectorized(starting_w, X,Y, learning_rate, epsilon):
w = starting_w
grad = np.array([np.inf,np.inf])
i = 0
while (np.linalg.norm(grad)>=epsilon):
w,b_gradient,m_gradient = step_gradient_vectorized(w, X, Y, learning_rate)
grad = np.array([b_gradient,m_gradient])
#print(np.linalg.norm(grad))
if i % 1000 == 0:
print("MSE na iteração {0} é de {1}".format(i,compute_mse_vectorized(w, X, Y)))
i+= 1
return w
In [41]:
points = np.genfromtxt("income.csv", delimiter=",")
points = np.c_[np.ones(len(points)),points]
X = points[:,[0,1]]
Y = points[:,2][:,np.newaxis]
init_w = np.zeros((2,1))
learning_rate = 0.0001
#num_iterations = 10000
epsilon = 0.5
print("Starting gradient descent at w0 = {0}, w1 = {1}, error = {2}".format(init_w[0], init_w[1], compute_mse_vectorized(init_w, X,Y)))
print("Running...")
tic = time.time()
w = gradient_descent_runner_vectorized(init_w, X,Y, learning_rate, epsilon)
toc = time.time()
print("Gradiente descendente convergiu com w0 = {0}, w1 = {1}, error = {2}".format(w[0], w[1], compute_mse_vectorized(w,X,Y)))
print("Versão vetorizada rodou em: " + str(1000*(toc-tic)) + " ms")
In [ ]: