In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from perceptron import Perceptron
from graphhelpers import plot_decision_regions

%matplotlib inline

In [2]:
col_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)', 'class']
df = pd.read_csv('../data/iris/iris.data', header=None, names=col_names)
df.tail()


Out[2]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) class
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

In [3]:
y = df.iloc[:100,4].values  # Grab only the first 2 classifications (first 100 rows)
y


Out[3]:
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor'], dtype=object)

In [4]:
y = np.where(y == 'Iris-setosa', -1, 1)
y


Out[4]:
array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1])

In [5]:
X = df.iloc[:100,[0,2]].values  # Grab only the 1st and 3rd column
X


Out[5]:
array([[ 5.1,  1.4],
       [ 4.9,  1.4],
       [ 4.7,  1.3],
       [ 4.6,  1.5],
       [ 5. ,  1.4],
       [ 5.4,  1.7],
       [ 4.6,  1.4],
       [ 5. ,  1.5],
       [ 4.4,  1.4],
       [ 4.9,  1.5],
       [ 5.4,  1.5],
       [ 4.8,  1.6],
       [ 4.8,  1.4],
       [ 4.3,  1.1],
       [ 5.8,  1.2],
       [ 5.7,  1.5],
       [ 5.4,  1.3],
       [ 5.1,  1.4],
       [ 5.7,  1.7],
       [ 5.1,  1.5],
       [ 5.4,  1.7],
       [ 5.1,  1.5],
       [ 4.6,  1. ],
       [ 5.1,  1.7],
       [ 4.8,  1.9],
       [ 5. ,  1.6],
       [ 5. ,  1.6],
       [ 5.2,  1.5],
       [ 5.2,  1.4],
       [ 4.7,  1.6],
       [ 4.8,  1.6],
       [ 5.4,  1.5],
       [ 5.2,  1.5],
       [ 5.5,  1.4],
       [ 4.9,  1.5],
       [ 5. ,  1.2],
       [ 5.5,  1.3],
       [ 4.9,  1.5],
       [ 4.4,  1.3],
       [ 5.1,  1.5],
       [ 5. ,  1.3],
       [ 4.5,  1.3],
       [ 4.4,  1.3],
       [ 5. ,  1.6],
       [ 5.1,  1.9],
       [ 4.8,  1.4],
       [ 5.1,  1.6],
       [ 4.6,  1.4],
       [ 5.3,  1.5],
       [ 5. ,  1.4],
       [ 7. ,  4.7],
       [ 6.4,  4.5],
       [ 6.9,  4.9],
       [ 5.5,  4. ],
       [ 6.5,  4.6],
       [ 5.7,  4.5],
       [ 6.3,  4.7],
       [ 4.9,  3.3],
       [ 6.6,  4.6],
       [ 5.2,  3.9],
       [ 5. ,  3.5],
       [ 5.9,  4.2],
       [ 6. ,  4. ],
       [ 6.1,  4.7],
       [ 5.6,  3.6],
       [ 6.7,  4.4],
       [ 5.6,  4.5],
       [ 5.8,  4.1],
       [ 6.2,  4.5],
       [ 5.6,  3.9],
       [ 5.9,  4.8],
       [ 6.1,  4. ],
       [ 6.3,  4.9],
       [ 6.1,  4.7],
       [ 6.4,  4.3],
       [ 6.6,  4.4],
       [ 6.8,  4.8],
       [ 6.7,  5. ],
       [ 6. ,  4.5],
       [ 5.7,  3.5],
       [ 5.5,  3.8],
       [ 5.5,  3.7],
       [ 5.8,  3.9],
       [ 6. ,  5.1],
       [ 5.4,  4.5],
       [ 6. ,  4.5],
       [ 6.7,  4.7],
       [ 6.3,  4.4],
       [ 5.6,  4.1],
       [ 5.5,  4. ],
       [ 5.5,  4.4],
       [ 6.1,  4.6],
       [ 5.8,  4. ],
       [ 5. ,  3.3],
       [ 5.6,  4.2],
       [ 5.7,  4.2],
       [ 5.7,  4.2],
       [ 6.2,  4.3],
       [ 5.1,  3. ],
       [ 5.7,  4.1]])

In [6]:
plt.scatter(X[:50, 0], X[:50, 1], color='purple', marker='o', label='setosa')
plt.scatter(X[50:, 0], X[50:, 1], color='blue', marker='x', label='versicolor')
plt.xlabel(col_names[0])
plt.ylabel(col_names[2])
plt.legend(loc='upper left')
plt.show()



In [7]:
ppn = Perceptron(learning_rate=0.1, iterations=10)
ppn.fit(X, y)


Out[7]:
<perceptron.Perceptron at 0x10e37c290>

In [8]:
plt.plot(range(1, len(ppn.errors)+1), ppn.errors, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Number of Misclassifications')
plt.show()



In [9]:
plot_decision_regions(X, y, classifier=ppn)
plt.xlabel(col_names[0])
plt.ylabel(col_names[2])
plt.legend(loc='upper left')
plt.show()



In [ ]: