In [ ]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
In [ ]:
def computeCost(X, y, w):
N = y.size
J = 0
for i in range(N):
J += np.square(X[i, :].dot(w) - y[i])
J /= (2 * N)
return (J)
In [ ]:
def gradientDescent(X, y, w, alpha, num_iters):
N = y.size
J_history = np.zeros(num_iters)
temp = np.zeros(w.size)
numParameters = w.size
for iter in range(num_iters):
for j in range(numParameters):
delta_j = 0
for i in range(N):
delta_j += (X[i, :].dot(w) - y[i]) * X[i, j]
temp[j] = w[j] - alpha * (delta_j / N)
w = temp
J_history[iter] = computeCost(X, y, w)
return (w, J_history)
In [ ]:
fname = 'data1.txt'
data = np.loadtxt(fname, delimiter = ',')
N = data.shape[0]
X = data[:, 0]
y = data[:, 1]
# estender x acrescentando um componente contante 1. x ---> (1,x)
X = np.vstack(zip(np.ones(N), X))
print 'Dimensão do array X:', X.shape
print 'Dimensão do array y:', y.shape
In [ ]:
print 'Running Gradient Descent ...'
# chutar uns pesos iniciais
w = np.zeros(2)
initialCost = computeCost(X, y, w)
print 'Initial cost: ', initialCost
# Some gradient descent settings
iterations = 1500
alpha = 0.01
# run gradient descent
w, J_history = gradientDescent(X, y, w, alpha, iterations)
finalCost = computeCost(X, y, w)
print 'Final cost: ', finalCost
# Predict values for x = 3.5 and x = 7
predict1 = np.dot([1, 3.5], w)
print 'x = 3.5 predicted output = %f\n' %(predict1)
predict2 = np.dot([1, 7], w)
print 'x = 7.0 predicted output = %f\n' % (predict2)
# plot do resultado
print 'Weight w found by gradient descent: (%f, %f)' % (w[0], w[1])
# Plot the linear fit and predictions
plt.plot(X[:,1], y, 'rx')
plt.plot(X[:,1], X.dot(w), '-')
plt.plot(3.5, predict1,'o')
plt.plot(7, predict2,'+')
plt.xlim(-1, 10)
plt.ylim(-1,6)
plt.xlabel('x')
plt.ylabel('y')
plt.show()
In [ ]:
# plot da evolução do custo no treinamento já realizado
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
plt.subplot(121)
plt.plot(range(1, J_history.size + 1), J_history, '-')
plt.xlabel('Iteration')
plt.ylabel('Cost')
# um segundo treinamento, com valor de alpha distinto
w2 = np.zeros(2)
alpha2 = 0.05
w2, J_history2 = gradientDescent(X, y, w2, alpha2, iterations)
plt.subplot(122)
plt.plot(range(1, J_history2.size + 1), J_history2, '-')
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.show()
In [ ]:
print 'Visualizing J(w_0, w_1)'
# Grid over which we will calculate J
w0_vals = np.linspace(-10, 10, 50);
w1_vals = np.linspace(-1, 4, 50);
w0_coord, w1_coord = np.meshgrid(w0_vals, w1_vals)
# initialize J_vals to a matrix of 0's
J_vals = np.zeros((w0_vals.size, w1_vals.size))
# Fill out J_vals
for i in range(w0_vals.size):
for j in range(w1_vals.size):
t = [w0_vals[i], w1_vals[j]]
J_vals[i,j] = computeCost(X, y, t)
# Surface plot
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
ax.plot_surface(w0_coord, w1_coord, J_vals.T, rstride=1, cstride=1, alpha=0.6, cmap=plt.cm.jet)
ax.set_xlabel(r'$w_0$')
ax.set_ylabel(r'$w_1$')
ax.set_zlabel('Cost')
ax.view_init(elev=15, azim=230)
plt.show()