In [1]:
import numpy as np
import pandas

import matplotlib
from matplotlib import pyplot as plt

%matplotlib inline

In [2]:
df = pandas.read_csv("../data/regression.univariate.csv", sep=',', header=0)

df.head()


Out[2]:
area price
0 227.766945 470.022085
1 170.267480 220.426133
2 200.359109 345.117593
3 243.889538 467.335341
4 247.447586 378.182600

In [60]:
mu, sigma = df.mean(), df.std()

df = (df - mu)/sigma

df.head()


Out[60]:
area price
0 0.852807 1.537415
1 -0.516996 -0.659500
2 0.199873 0.438020
3 1.236894 1.513766
4 1.321657 0.729054

In [61]:
def cost(w0, w1, data):
    n = data.shape[0]
    x,y = data.iloc[:,0].values, data.iloc[:,1].values
    ypred = w0 + w1*x
    error = 1/(2.0*n) * np.sum((ypred - y)**2)
    return(error)

# test:
cost(-100, 2, df)


Out[61]:
5000.6289308288551

In [85]:
w0_array = np.arange(-2, 2, 0.1)
w1_array = np.arange(-2, 3, 0.1)

print(w0_array.shape, w1_array.shape)

W0, W1 = np.meshgrid(w0_array, w1_array, indexing='ij')

print(W0.shape, W1.shape)


(40,) (50,)
(40, 50) (40, 50)

In [86]:
J = np.empty(shape=W0.shape)

for i,w0 in enumerate(w0_array):
    for j,w1 in enumerate(w1_array):
        J[i,j] = cost(w0, w1, df)

In [108]:
cmap = plt.cm.get_cmap("winter")
cmap.set_under("magenta")
cmap.set_over("yellow")

levels = [x/4 for x in range(24)]
fig = plt.figure(figsize=(10,10))

cs = plt.contourf(W0, W1, J, levels)

levels = [0.25, 0.75, 1.25, 1.75, 2.25, 2.75, 3.25, 3.75]
CS4 = plt.contour(W0, W1, J, levels,
                  colors=('k',),
                  linewidths=(1,),
                  origin='lower')

fig.colorbar(cs, ax=ax, shrink=0.9)
plt.tick_params(axis='both', which='major', labelsize=15)
plt.xlabel('x', fontsize=14)

plt.show()



In [ ]: