In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import axes3d, Axes3D #<-- Note the capitalization! 
%matplotlib inline
sns.set_style("white")

In [30]:
#functions 

def feature_normalize(df_n) :
    return ( ( df_n - df_n.mean() ) / df_n.std() )

def cost_function(X,y,theta):
    h_theta = np.dot(theta.T,X.T).T
    J = sum( ( 1 / ( 2 * m ) ) * ((h_theta - y) ** 2))
    return J

def gradient_function():
    for n in range(iterations):
        global theta
        last_j[n] = cost_function(X,y,theta)
        h_theta = np.dot(theta.T,X.T).T
        theta = theta - (alpha / m) * np.dot((h_theta - y).T,X).T
    return theta

In [31]:
df = pd.read_csv('ex1data2.txt', sep=',', header=None)
df.describe()


Out[31]:
0 1 2
count 47.000000 47.000000 47.000000
mean 2000.680851 3.170213 340412.659574
std 794.702354 0.760982 125039.899586
min 852.000000 1.000000 169900.000000
25% 1432.000000 3.000000 249900.000000
50% 1888.000000 3.000000 299900.000000
75% 2269.000000 4.000000 384450.000000
max 4478.000000 5.000000 699900.000000

In [32]:
df.head()


Out[32]:
0 1 2
0 2104 3 399900
1 1600 3 329900
2 2400 3 369000
3 1416 2 232000
4 3000 4 539900

In [33]:
df_norm = feature_normalize(df)

In [34]:
m = df_norm.shape[0]

In [35]:
df_norm.insert(0,'3',np.ones(m))
df.insert(0,'3',np.ones(m))

In [36]:
df_norm.columns = np.arange(0,4)
df.columns = np.arange(0,4)

In [37]:
# initialization
theta = pd.Series(np.zeros(df_norm.columns.shape[0] - 1))
y = df_norm[3]
X = df_norm.T[0:3].T
alpha = 0.1
iterations = 50
last_j = np.zeros(50)

In [38]:
gradient_function()


Out[38]:
0   -1.185813e-16
1    8.327543e-01
2   -1.377203e-03
dtype: float64

In [18]:
# plotting Cost function versus Number of iterations
itr_list = np.arange(0,50)
fig = plt.figure(figsize=(12,8))
plt.plot(itr_list,last_j,'-b')


Out[18]:
[<matplotlib.lines.Line2D at 0xb089f82cf8>]

In [829]:
xx = ([1650, 3] - df.mean()[1:3]) / df.std()[1:3]
xx


Out[829]:
1   -0.441273
2   -0.223675
dtype: float64

In [830]:
xx = pd.DataFrame(xx).T
xx


Out[830]:
1 2
0 -0.441273 -0.223675

In [831]:
xx.insert(0,3,1)
xx


Out[831]:
3 1 2
0 1 -0.441273 -0.223675

In [832]:
xx.columns = np.arange(0,3)

In [835]:
np.dot(theta.T,xx.T)[0]


Out[835]:
-0.36716411456663245

Normal Equation


In [848]:
y = df[3]
X = df.T[0:3].T

In [837]:
theta = np.dot(np.dot(np.dot(X.T,X),X.T),y)

In [849]:
theta


Out[849]:
array([  3.37743390e+15,   7.80063079e+18,   1.12666974e+16])

In [850]:
xx = [1650, 3]

In [851]:
xx = pd.DataFrame(xx).T

In [852]:
xx.insert(0,3,1)

In [853]:
xx.columns = np.arange(0,3)

In [854]:
np.dot(theta.T,xx.T)[0]


Out[854]:
1.2871077988282204e+22

In [ ]:


In [ ]: