In [ ]:
# Import packages
import pandas as pd
import numpy as np
from ipywidgets import interact
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
def probability_setosa( petal_length, petal_width, w0, w1, w2 ):
"Return the probability that a given specimen belongs to the species setosa"
# Compute sum of features times weights
x = w0 + w1*petal_width + w2*petal_length
# Apply non-linear function: sigmoid
p = 1./( 1. + np.exp( -x ) )
return( p )
In [ ]:
df = pd.read_csv('./data/setosa/train.csv')
df.head(10)
We then define a function that plots the prediction of the model for a given set of weights, along with the training data.
In [ ]:
def plot_model( w0, w1, w2 ):
"Plot the model, along with the training data."
# Calculate the probability on a mesh
petal_width_mesh, petal_length_mesh = \
np.meshgrid( np.linspace(0,3,100), np.linspace(0,8,100) )
p = probability_setosa( petal_width_mesh, petal_length_mesh, w0, w1, w2 )
# Plot the probability on the mesh
plt.clf()
plt.imshow( p.T, extent=[0,3,0,8], origin='lower',
vmin=0, vmax=1, cmap='RdBu', aspect='auto', alpha=0.5 )
# Plot the data points
plt.scatter( df['petal width (cm)'], df['petal length (cm)'], c=df['setosa'], cmap='RdBu')
plt.xlabel('petal width (cm)')
plt.ylabel('petal length (cm)')
cb = plt.colorbar()
cb.set_label('setosa')
We can then use the function interact
of ipywidgets
to call this function with adjustable weights:
In [ ]:
interact( plot_model, w0=(-4.,5.), w1=(-2.,2.), w2=(-2., 3.))
In [ ]:
# Optimal weights: fill these values
w0 =
w1 =
w2 =
In [ ]:
df_test = pd.read_csv('./data/setosa/test.csv')
df_test.head(10)
We can now check the accuracy of our model on the first point for instance:
In [ ]:
probability_setosa( 4.2, 1.5, w0, w1, w2 )
More generally, by using pandas syntax, we can perform predictions on the whole dataset:
In [ ]:
df_test['probability_setosa_predicted'] = \
probability_setosa( df_test['petal length (cm)'], df_test['petal width (cm)'], w0, w1, w2 )
In [ ]:
df_test
While the above procedure yields good results, it is very cumbersome to try to find the weights by hand. Let us use keras here to automate this process.