In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from __future__ import division
from math import *
from scipy.optimize import fmin_bfgs
sns.set_style("whitegrid")
%matplotlib inline
In [8]:
df = pd.read_csv('ex2data2.txt', header=None)
# number of the training examples
m = df.shape[0]
# inserting column to our data
df.insert(0,'3', np.ones(m))
In [9]:
# changing columns name
df.columns = ['one', 'second', 'third', 'result']
# head of the data
df.head(10)
Out[9]:
In [10]:
# Spliting X and Y values
X = df[['one', 'second', 'third']]
y = df['result']
In [16]:
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
Out[16]:
In [64]:
# Plotting boundry line
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
plt.contour(u,v,z,1)
Out[64]:
In [59]:
# Initialization
m_row = X.shape[0]
# Creating new features for getting more complicated plot
X_new = mapFeature(X)
m_column = X_new.shape[1]
_lambda = 0
theta = pd.Series(np.zeros(m_column))
In [60]:
gradient_function(theta, X_new, y, _lambda).T[0:5]
Out[60]:
In [61]:
cost_function(theta,X_new, y, _lambda)
Out[61]:
In [62]:
xopt = fmin_bfgs(f= cost_function,
x0= theta,
fprime= gradient_function,
args=(X_new,y, _lambda),
maxiter=400)
In [63]:
# Here is the grid range
u = np.linspace(-1,1.5,50)
v = np.linspace(-1,1.5,50)
z = np.zeros((u.size,v.size))
for i in range(u.size):
for j in range(v.size):
dd = pd.DataFrame([1, u[i], v[j]]).T
dd.columns = ['one', 'second', 'third']
z[i,j] = mapFeature(dd).dot(xopt)
z = z.T
In [19]:
# Map featuring
def mapFeature(X, degree= 7) :
count = 0;
X_new = pd.DataFrame(np.ones(X.shape[0]))
for i in range(degree):
for j in range(i + 1):
X_new[count] = ( X['second'] ** (i - j) ) * ( X['third'] ** j )
count += 1
return X_new
#functions Sections
def sigmoid(x):
return ( 1 / ( 1 + e ** ( -1 * x)))
def cost_function(theta,X,y, _lam):
J = 0
# finding hypothesis
h = pd.Series(np.dot( theta.T, X.T ).T)
# Computing Log(sigmoid(x)) for all of the hypotesis elements
h1 = sigmoid(h).apply(log)
# Computing Log( 1 - simgoid(x)) for all of the hypotesis elements
h2 = (1.0000000001 - sigmoid(h)).apply(log)
#Computing Cost of the hypotesis
J = ( -1 / m_row ) * ( y.T.dot(h1) + ( 1 - y ).T.dot(h2)) + ( _lam / ( 2 * m_row ) * sum( theta ** 2 ))
return J
def gradient_function( theta,X, y, _lam):
# finding hypotesis matrix
h = pd.Series(np.dot( theta.T, X.T ).T)
h = sigmoid(h)
# Computing the Gradient Of the Hypotesis
grad = pd.Series(np.zeros(m_column))
grad[0] = ( 1 / m_row ) * ( ( h - y ).T.dot(X[0]).T )
grad[1:] = ( 1 / m_row ) * ( ( h - y ).T.dot( X.T[1:].T ).T ) + ( _lam / m_row ) * theta[1:]
return grad
def gradient_algo(X, y, theta, _lam):
for n in range(iterations):
# finding gradient of each element
grad = gradient_function(X, y, theta, _lam)
# decreasing theta
theta = theta - alpha * ( grad )
#saving all of the costs
global last_j
last_j[n] = cost_function(X, y, theta, _lam)
return theta
In [ ]: