In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from __future__ import division
from math import *
from scipy.optimize import fmin_bfgs
sns.set_style("whitegrid")
%matplotlib inline

In [8]:
df = pd.read_csv('ex2data2.txt', header=None)

# number of the training examples
m = df.shape[0]

# inserting column to our data
df.insert(0,'3', np.ones(m))

In [9]:
# changing columns name
df.columns = ['one', 'second', 'third', 'result']

# head of the data
df.head(10)


Out[9]:
one second third result
0 1.0 0.051267 0.699560 1
1 1.0 -0.092742 0.684940 1
2 1.0 -0.213710 0.692250 1
3 1.0 -0.375000 0.502190 1
4 1.0 -0.513250 0.465640 1
5 1.0 -0.524770 0.209800 1
6 1.0 -0.398040 0.034357 1
7 1.0 -0.305880 -0.192250 1
8 1.0 0.016705 -0.404240 1
9 1.0 0.131910 -0.513890 1

In [10]:
# Spliting X and Y values
X = df[['one', 'second', 'third']]
y = df['result']

Visualizing Data


In [16]:
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})


Out[16]:
<seaborn.axisgrid.FacetGrid at 0xbe865e17b8>

In [64]:
# Plotting boundry line
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
plt.contour(u,v,z,1)


Out[64]:
<matplotlib.contour.QuadContourSet at 0xbe8b6b7278>

Compuations Section


In [59]:
# Initialization
m_row = X.shape[0]

# Creating new features for getting more complicated plot
X_new = mapFeature(X)
m_column = X_new.shape[1]

_lambda = 0
theta = pd.Series(np.zeros(m_column))

In [60]:
gradient_function(theta, X_new, y, _lambda).T[0:5]


Out[60]:
0    0.008475
1    0.018788
2    0.000078
3    0.050345
4    0.011501
dtype: float64

In [61]:
cost_function(theta,X_new, y, _lambda)


Out[61]:
0.6931471804582503

In [62]:
xopt = fmin_bfgs(f= cost_function,
                 x0= theta,
                 fprime= gradient_function,
                 args=(X_new,y, _lambda),
                 maxiter=400)


Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.263499
         Iterations: 400
         Function evaluations: 401
         Gradient evaluations: 401

In [63]:
# Here is the grid range
u = np.linspace(-1,1.5,50)
v = np.linspace(-1,1.5,50)

z = np.zeros((u.size,v.size))

for i in range(u.size):
    for j in range(v.size):
        dd = pd.DataFrame([1, u[i], v[j]]).T
        dd.columns = ['one', 'second', 'third']
        z[i,j] = mapFeature(dd).dot(xopt)
z = z.T

Functions Section


In [19]:
# Map featuring
def mapFeature(X, degree= 7) :
    count = 0;
    X_new = pd.DataFrame(np.ones(X.shape[0]))
    for i in range(degree):
        for j in range(i + 1):
            X_new[count] = ( X['second'] ** (i - j) ) * ( X['third'] ** j )
            count += 1
            
    return X_new

#functions Sections
def sigmoid(x):
    return ( 1 / ( 1 + e ** ( -1 * x)))

def cost_function(theta,X,y, _lam):
    J = 0

    # finding hypothesis
    h = pd.Series(np.dot( theta.T, X.T ).T)
    
    # Computing Log(sigmoid(x)) for all of the hypotesis elements
    h1 = sigmoid(h).apply(log)
    
    # Computing Log( 1 - simgoid(x)) for all of the hypotesis elements
    h2 = (1.0000000001 - sigmoid(h)).apply(log)
    
    #Computing Cost of the hypotesis
    J =  ( -1 / m_row ) * ( y.T.dot(h1) + ( 1 - y ).T.dot(h2)) + ( _lam / ( 2 * m_row ) * sum( theta ** 2 ))
    
    return J

def gradient_function( theta,X, y,  _lam):
    # finding hypotesis matrix
    h = pd.Series(np.dot( theta.T, X.T ).T)
    h = sigmoid(h)

    # Computing the Gradient Of the Hypotesis
    grad = pd.Series(np.zeros(m_column))
    grad[0] = ( 1 / m_row ) * ( ( h - y ).T.dot(X[0]).T )
    grad[1:] = ( 1 / m_row ) * ( ( h - y ).T.dot( X.T[1:].T ).T ) + ( _lam / m_row ) * theta[1:]
    
    return grad

def gradient_algo(X, y, theta, _lam):
    for n in range(iterations):
        
        # finding gradient of each element
        grad = gradient_function(X, y, theta, _lam)

        # decreasing theta
        theta = theta - alpha * ( grad )
        
        #saving all of the costs
        global last_j
        last_j[n] = cost_function(X, y, theta, _lam)
        
    return theta

In [ ]: