In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from util.util import *
plt.style.use('seaborn-poster')
%matplotlib inline
In [2]:
# import the iris data
iris = datasets.load_iris()
# let's just use two features, so that we can
# easily visualize them
X = iris.data[:, [0, 2]]
y = iris.target
target_names = iris.target_names
feature_names = iris.feature_names
# get the classes
n_class = len(set(y))
print('We have %d classes in the data'%(n_class))
In [3]:
# let's have a look of the data first
plt.figure(figsize = (10,8))
for i, c, s in (zip(range(n_class), ['b', 'g', 'r'], ['o', '^', '*'])):
ix = y == i
plt.scatter(X[:, 0][ix], X[:, 1][ix], color = c, marker = s, s = 60, label = target_names[i])
plt.legend(loc = 2, scatterpoints = 1)
plt.xlabel('Feature 1 - ' + feature_names[0])
plt.ylabel('Feature 2 - ' + feature_names[2])
plt.show()
SVM is a very popular algorithm in machine learning. It is an algorithm based on the intuition. If we have the following two lines to separate the orange squares and blue dots, which one do you think is better?
If you want to know more about SVM, check it on Qingkai's blog
In [4]:
# Initialize SVM classifier
clf = svm.SVC(kernel='linear')
# Train the classifier with data
clf.fit(X,y)
Out[4]:
In [5]:
clf.predict(X)
Out[5]:
Let's view the performance on the training data, we will plot the confusion matrix. Also, we will plot the decision boundary, which will help us understand more of the capability of the classifier (since we only have two selected features, this makes us eaiser to view the decision boundary).
In [6]:
# predict results from the test data
predicted = clf.predict(X)
# plot the confusion matrix
cm = confusion_matrix(y,predicted)
plot_confusion_matrix(cm, classes=iris.target_names,
title='Confusion matrix, without normalization')
In [7]:
plot_desicion_boundary(X, y, clf)
Let's do the classification using an Artificial Neural Network (ANN). It is another popular algorithm in machine learning, the structure of a standard ANN is:
We don't have time to go over the basics of ANN, but you can find more information about it on Qingkai's blog.
In [8]:
from sklearn.neural_network import MLPClassifier
In [ ]:
# Initialize ANN classifier
# create an ANN with two hidden layers, each with 10 neurons
hidden_layer_sizes =
# using a logistic activation function
activation =
mlp = MLPClassifier(hidden_layer_sizes= hidden_layer_sizes, activation=activation, \
max_iter = 2000, random_state=13)
In [ ]:
# Train the classifier with the traning data
In [ ]:
# predict results from the test data
predicted =
In [ ]:
# plot the confusion matrix
cm = confusion_matrix(y,predicted)
plot_confusion_matrix(cm, classes=iris.target_names,
title='Confusion matrix, without normalization')
In [ ]:
%load ../solutions/solution_02.py
In [ ]: