In [34]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from __future__ import division
from math import *
from scipy.optimize import fmin_bfgs
sns.set_style("whitegrid")
%matplotlib inline

In [35]:
df = pd.read_csv('ex2data2.txt', header=None)

# number of the training examples
m = df.shape[0]

# inserting column to our data
df.insert(0,'3', np.ones(m))

In [36]:
# changing columns name
df.columns = ['one', 'second', 'third', 'result']

# head of the data
df.head(10)


Out[36]:
one second third result
0 1.0 0.051267 0.699560 1
1 1.0 -0.092742 0.684940 1
2 1.0 -0.213710 0.692250 1
3 1.0 -0.375000 0.502190 1
4 1.0 -0.513250 0.465640 1
5 1.0 -0.524770 0.209800 1
6 1.0 -0.398040 0.034357 1
7 1.0 -0.305880 -0.192250 1
8 1.0 0.016705 -0.404240 1
9 1.0 0.131910 -0.513890 1

In [37]:
# Spliting X and Y values
X = df[['one', 'second', 'third']]
y = df['result']

Visualizing Data


In [38]:
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})


Out[38]:
<seaborn.axisgrid.FacetGrid at 0xdbcf28b240>

In [39]:
# Plotting boundry line
sns.lmplot(x='second', y='third', data=df, hue='result', size=9, fit_reg=False, scatter_kws={"s": 100})
plt.contour(u,v,z,1)


Out[39]:
<matplotlib.contour.QuadContourSet at 0xdbce3afa58>

Compuations Section


In [40]:
# Initialization
m_row = X.shape[0]

# Creating new features for getting more complicated plot
X_new = mapFeature(X)
m_column = X_new.shape[1]

_lambda = 0
theta = pd.Series(np.zeros(m_column))

In [41]:
gradient_function(theta, X_new, y, _lambda).T[0:5]


Out[41]:
0    0.008475
1    0.018788
2    0.000078
3    0.050345
4    0.011501
dtype: float64

In [42]:
cost_function(theta,X_new, y, _lambda)


Out[42]:
0.6931471804582503

In [46]:
y


Out[46]:
0      1
1      1
2      1
3      1
4      1
5      1
6      1
7      1
8      1
9      1
10     1
11     1
12     1
13     1
14     1
15     1
16     1
17     1
18     1
19     1
20     1
21     1
22     1
23     1
24     1
25     1
26     1
27     1
28     1
29     1
      ..
88     0
89     0
90     0
91     0
92     0
93     0
94     0
95     0
96     0
97     0
98     0
99     0
100    0
101    0
102    0
103    0
104    0
105    0
106    0
107    0
108    0
109    0
110    0
111    0
112    0
113    0
114    0
115    0
116    0
117    0
Name: result, Length: 118, dtype: int64

In [43]:
xopt = fmin_bfgs(f= cost_function,
                 x0= theta,
                 fprime= gradient_function,
                 args=(X_new,y, _lambda),
                 maxiter=400)


Warning: Maximum number of iterations has been exceeded.
         Current function value: 0.263499
         Iterations: 400
         Function evaluations: 401
         Gradient evaluations: 401

In [44]:
# Here is the grid range
u = np.linspace(-1,1.5,50)
v = np.linspace(-1,1.5,50)

z = np.zeros((u.size,v.size))

for i in range(u.size):
    for j in range(v.size):
        dd = pd.DataFrame([1, u[i], v[j]]).T
        dd.columns = ['one', 'second', 'third']
        z[i,j] = mapFeature(dd).dot(xopt)
z = z.T

Functions Section


In [45]:
# Map featuring
def mapFeature(X, degree= 7) :
    count = 0;
    X_new = pd.DataFrame(np.ones(X.shape[0]))
    for i in range(degree):
        for j in range(i + 1):
            X_new[count] = ( X['second'] ** (i - j) ) * ( X['third'] ** j )
            count += 1
            
    return X_new

#functions Sections
def sigmoid(x):
    return ( 1 / ( 1 + e ** ( -1 * x)))

def cost_function(theta,X,y, _lam):
    J = 0

    # finding hypothesis
    h = pd.Series(np.dot( theta.T, X.T ).T)
    
    # Computing Log(sigmoid(x)) for all of the hypotesis elements
    h1 = sigmoid(h).apply(log)
    
    # Computing Log( 1 - simgoid(x)) for all of the hypotesis elements
    h2 = (1.0000000001 - sigmoid(h)).apply(log)
    
    #Computing Cost of the hypotesis
    J =  ( -1 / m_row ) * ( y.T.dot(h1) + ( 1 - y ).T.dot(h2)) + ( _lam / ( 2 * m_row ) * sum( theta ** 2 ))
    
    return J

def gradient_function( theta,X, y,  _lam):
    # finding hypotesis matrix
    h = pd.Series(np.dot( theta.T, X.T ).T)
    h = sigmoid(h)

    # Computing the Gradient Of the Hypotesis
    grad = pd.Series(np.zeros(m_column))
    grad[0] = ( 1 / m_row ) * ( ( h - y ).T.dot(X[0]).T )
    grad[1:] = ( 1 / m_row ) * ( ( h - y ).T.dot( X.T[1:].T ).T ) + ( _lam / m_row ) * theta[1:]
    
    return grad

def gradient_algo(X, y, theta, _lam):
    for n in range(iterations):
        
        # finding gradient of each element
        grad = gradient_function(X, y, theta, _lam)

        # decreasing theta
        theta = theta - alpha * ( grad )
        
        #saving all of the costs
        global last_j
        last_j[n] = cost_function(X, y, theta, _lam)
        
    return theta

In [ ]: