In [1]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
In [2]:
boston_dataset = load_boston()
# extract prices (in thousands)
house_prices = boston_dataset.target
num_rooms = boston_dataset.data[:,5].reshape(-1, 1)
In [3]:
plt.scatter(num_rooms, house_prices)
plt.show()
Seems pretty "linear"...
In [4]:
# theta_0 = y-intercept
# theta_1 = slope
# num_rooms = array with the average number of rooms in the suburb
# prices = array with the average price in the suburb
# n = number of training examples
def compute_cost(theta_0, theta_1, num_rooms, prices):
total_squared_error = 0.0
n = len(num_rooms)
for i in range(n):
# compute h(x_i), where x_i = num_rooms[i] for the values of
# theta_0 and theta_1
h_x = (theta_1 * num_rooms[i][0]) + theta_0
# compute the difference between our guess
# for h(x_i) and the true value y_i
raw_error = prices[i] - h_x
# square the error to make the function differentiable
# and make the error positive
squared_error = raw_error**2
# add it to the total
total_squared_error += squared_error
MSE = total_squared_error / (2.0 * n)
return MSE
In [5]:
# you don't really need to know what this code does, but you should
# know what the visualization means. This might take awhile to run.
def plot_cost_function(rotate=False):
fig = plt.figure(figsize=(16, 16))
ax = fig.gca(projection='3d')
# values of theta_0 (intercept) and theta_1 (slope) to try
theta_0s = np.linspace(-50, -20, 100)
theta_1s = np.linspace(0, 20, 100)
# create meshgrid out of them
theta_0_mesh, theta_1_mesh = np.meshgrid(theta_0s, theta_1s)
mses = np.array([compute_cost(theta_0, theta_1, num_rooms, house_prices)
for theta_0, theta_1 in zip(np.ravel(theta_0_mesh),
np.ravel(theta_1_mesh))])
mse = mses.reshape(theta_0_mesh.shape)
if rotate is True:
surf = ax.plot_surface(theta_1_mesh, theta_0_mesh, mse,
rstride=1, cstride=1, linewidth=0.1,
cmap=cm.coolwarm, alpha=0.5)
else:
surf = ax.plot_surface(theta_0_mesh, theta_1_mesh, mse,
rstride=1, cstride=1, linewidth=0.1,
cmap=cm.coolwarm, alpha=0.5)
if rotate is True:
ax.set_xlabel('theta 1 (slope)')
ax.set_ylabel('theta 0 (intercept)')
else:
ax.set_xlabel('theta 0 (intercept)')
ax.set_ylabel('theta 1 (slope)')
ax.set_zlabel('MSE')
plt.show()
In [6]:
plot_cost_function()
In [7]:
plot_cost_function(rotate=True)
$X = \begin{pmatrix} x_0\\ x_1\\ \vdots\\ x_n \end{pmatrix}$ and $\theta = \begin{pmatrix} \theta_0\\ \theta_1\\ \vdots\\ \theta_n \end{pmatrix}$, so $h(x) = \theta_0x_0 + \theta_1x_1 + \theta_2x_2 + ... + \theta_nx_n = \theta^TX$
$\theta^T = \begin{bmatrix} \theta_0 & \theta_1 & \dots & \theta_n \end{bmatrix}$
$\theta = (X^T X)^{-1} X^T Y$