Iris Classification with Logistic Regression


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from models import linear_model, logistic_model, log_cost, log_cost_dev, gd_update
from models import binary_confusion_matrix, std_normalize, binary_accuracy, create_parameters, data_normalize
from sklearn.model_selection import train_test_split

%matplotlib inline

1), prepare data


In [2]:
df = pd.read_csv('./data/iris.csv')
df = df.reindex(np.random.permutation(df.index))
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 150 entries, 72 to 38
Data columns (total 6 columns):
Id               150 non-null int64
SepalLengthCm    150 non-null float64
SepalWidthCm     150 non-null float64
PetalLengthCm    150 non-null float64
PetalWidthCm     150 non-null float64
Species          150 non-null object
dtypes: float64(4), int64(1), object(1)
memory usage: 8.2+ KB

In [3]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa']]
data.head()


Out[3]:
SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm IsSetosa
72 6.3 2.5 4.9 1.5 0.0
99 5.7 2.8 4.1 1.3 0.0
21 5.1 3.7 1.5 0.4 1.0
39 5.1 3.4 1.5 0.2 1.0
147 6.5 3.0 5.2 2.0 0.0

In [4]:
train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y = np.array(train[['IsSetosa']])

In [5]:
np.mean(train_X, axis=0)


Out[5]:
array([ 5.87333333,  3.01833333,  3.82083333,  1.20833333])

In [6]:
train_stds, train_means = std_normalize(train_X)

In [7]:
np.mean(train_X, axis=0)


Out[7]:
array([ -7.51250913e-16,   1.62832710e-15,   5.92118946e-17,
         1.25825276e-16])

In [8]:
np.std(train_X, axis=0)


Out[8]:
array([ 1.,  1.,  1.,  1.])

2), train


In [9]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]

W, b = create_parameters(feature_size)

threshold = 0.5
lr = 0.01

for epoch in range(0, 1000):
    h = logistic_model(train_X, W, b)
    dW, db = log_cost_dev(train_X, train_y, h)
    W, b = gd_update(W, b, dW, db, lr)
    if (epoch + 1) % 100 == 0:
        cur_cost = log_cost(h, train_y)
        conf = binary_confusion_matrix(h, train_y, threshold=threshold)
        print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

predictions = logistic_model(train_X, W, b)
final_cost = log_cost(predictions, train_y)
conf = binary_confusion_matrix(predictions, train_y, threshold=threshold)
print('training finished!')
print('final cost: {0}, conf: {1}'.format(final_cost, conf))


epoch: 100, cost: 1.3745801338925958, conf: (0.04477611940298507, 0.07894736842105263, 0.05714285714285714)
epoch: 200, cost: 0.674898608867079, conf: (0.42105263157894735, 0.631578947368421, 0.5052631578947367)
epoch: 300, cost: 0.4110166387166828, conf: (0.6379310344827587, 0.9736842105263158, 0.7708333333333335)
epoch: 400, cost: 0.29126399010419846, conf: (0.7450980392156863, 1.0, 0.8539325842696629)
epoch: 500, cost: 0.22517771606799214, conf: (0.8636363636363636, 1.0, 0.9268292682926829)
epoch: 600, cost: 0.18350381003692745, conf: (0.9047619047619048, 1.0, 0.9500000000000001)
epoch: 700, cost: 0.15485671561715814, conf: (0.95, 1.0, 0.9743589743589743)
epoch: 800, cost: 0.13397360852104612, conf: (0.95, 1.0, 0.9743589743589743)
epoch: 900, cost: 0.11809009119945889, conf: (1.0, 1.0, 1.0)
epoch: 1000, cost: 0.10561324578302447, conf: (1.0, 1.0, 1.0)
training finished!
final cost: 0.10550201197717494, conf: (1.0, 1.0, 1.0)

3). try test data


In [10]:
test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y = np.array(test[['IsSetosa']])
data_normalize(test_X, train_stds, train_means)

In [11]:
test_h = logistic_model(test_X, W, b)
test_cost = log_cost(test_h, test_y)
test_conf = binary_confusion_matrix(test_h, test_y, threshold=threshold)
print('test cost: {0}, conf: {1}'.format(test_cost, test_conf))


test cost: 0.12952589901785055, conf: (0.9230769230769231, 1.0, 0.9600000000000001)

so, this is only for Setosa, we want generalize binary classification to multi-classies

Iris, one-vs-all

1), prepare data again


In [12]:
df['Species'].unique()


Out[12]:
array(['Iris-versicolor', 'Iris-setosa', 'Iris-virginica'], dtype=object)

In [29]:
df['IsSetosa'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-setosa' else 0)
df['IsVericolor'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-versicolor' else 0)
df['IsVirginica'] = df['Species'].apply(lambda a: 1.0 if a=='Iris-virginica' else 0)
data = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'IsSetosa', 'IsVericolor', 'IsVirginica']]

train, test = train_test_split(data, test_size=0.2)
train_X = np.array(train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
train_y0 = np.array(train[['IsSetosa']])
train_y1 = np.array(train[['IsVericolor']])
train_y2 = np.array(train[['IsVirginica']])
train_y_all = np.array(train[['IsSetosa', 'IsVericolor', 'IsVirginica']])

test_X = np.array(test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']])
test_y_all = np.array(test[['IsSetosa', 'IsVericolor', 'IsVirginica']])

x_means, x_stds = std_normalize(train_X)
data_normalize(test_X, x_means, x_stds)

2), define some utils


In [30]:
def train_lr_classifier(X, y, lr=0.01, threshold=0.5, epochs=1000, step_size=100):
    feature_size = X.shape[1]
    sample_count = y.shape[0]
    W, b = create_parameters(feature_size)
    
    for epoch in range(0, epochs):
        h = logistic_model(X, W, b)
        dW, db = log_cost_dev(X, y, h)
        W, b = gd_update(W, b, dW, db, lr)
        if (epoch + 1) % step_size == 0:
            cur_cost = log_cost(h, y)
            conf = binary_confusion_matrix(h, y, threshold=threshold)
            print('epoch: {0}, cost: {1}, conf: {2}'.format(epoch + 1, cur_cost, conf))

    predictions = logistic_model(X, W, b)
    final_cost = log_cost(predictions, y)
    conf = binary_confusion_matrix(predictions, y, threshold=threshold)
    print('training finished!')
    print('final cost: {0}, conf: {1}'.format(final_cost, conf))
    return W, b

In [31]:
m0 = train_lr_classifier(train_X, train_y0, lr=0.01, threshold=0.5)


epoch: 100, cost: 0.6413593778580737, conf: (0.8333333333333334, 0.23255813953488372, 0.3636363636363636)
epoch: 200, cost: 0.3239125928763921, conf: (0.9629629629629629, 0.6046511627906976, 0.7428571428571429)
epoch: 300, cost: 0.19547085770561048, conf: (1.0, 0.8837209302325582, 0.9382716049382717)
epoch: 400, cost: 0.136598131506859, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 500, cost: 0.10514600594178773, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 600, cost: 0.08612278759634744, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 700, cost: 0.07351856281845054, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 800, cost: 0.06459017480374314, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 900, cost: 0.05794039815033709, conf: (1.0, 0.9767441860465116, 0.988235294117647)
epoch: 1000, cost: 0.052792391408487065, conf: (1.0, 0.9767441860465116, 0.988235294117647)
training finished!
final cost: 0.05274675309628947, conf: (1.0, 0.9767441860465116, 0.988235294117647)

In [32]:
m1 = train_lr_classifier(train_X, train_y1, lr=0.01, threshold=0.5, epochs=50000, step_size=10000)


epoch: 10000, cost: 0.4672810189697581, conf: (0.64, 0.43243243243243246, 0.5161290322580645)
epoch: 20000, cost: 0.46612468084009984, conf: (0.64, 0.43243243243243246, 0.5161290322580645)
epoch: 30000, cost: 0.4659093728560644, conf: (0.64, 0.43243243243243246, 0.5161290322580645)
epoch: 40000, cost: 0.46579602875281123, conf: (0.64, 0.43243243243243246, 0.5161290322580645)
epoch: 50000, cost: 0.46573324705435276, conf: (0.64, 0.43243243243243246, 0.5161290322580645)
training finished!
final cost: 0.46573324684480516, conf: (0.64, 0.43243243243243246, 0.5161290322580645)

In [33]:
m2 = train_lr_classifier(train_X, train_y2, lr=0.01, threshold=0.5, epochs=50000, step_size=10000)


epoch: 10000, cost: 0.12692241149644967, conf: (0.95, 0.95, 0.9500000000000001)
epoch: 20000, cost: 0.09590092286288818, conf: (0.9512195121951219, 0.975, 0.9629629629629629)
epoch: 30000, cost: 0.08151045623289317, conf: (0.9512195121951219, 0.975, 0.9629629629629629)
epoch: 40000, cost: 0.07295167490350239, conf: (0.9512195121951219, 0.975, 0.9629629629629629)
epoch: 50000, cost: 0.06715221727815238, conf: (0.9512195121951219, 0.975, 0.9629629629629629)
training finished!
final cost: 0.067151727629488, conf: (0.9512195121951219, 0.975, 0.9629629629629629)

Classify multi-class with Softmax

What is softmax?

$$ \begin{equation} Softmax(x_j) = \frac{e^{x_j}}{\sum_{i=1}^m e^{x_{i}}} \end{equation} $$

See details in models.py


In [36]:
import models as ml

In [37]:
feature_size = train_X.shape[1]
sample_count = train_X.shape[0]
class_count = train_y_all.shape[1]

W, b = ml.create_parameters(feature_size, class_count)

for epoch in range(0, 100000):
    h = ml.softmax_regression_model(train_X, W, b)
    dW, db = ml.crossentropy_cost_dev(train_X, train_y_all, h)
    W, b = ml.gd_update(W, b, dW, db, lr=0.01)
    if (epoch + 1) % 10000 == 0:
        cur_cost = ml.crossentropy_cost(h, train_y_all)
        cur_acc = ml.categorical_accuracy(h, train_y_all)
        print('epoch: {0}, cost: {1}, acc: {2}'.format(epoch + 1, cur_cost, cur_acc))

predictions = ml.softmax_regression_model(train_X, W, b)
final_cost = ml.crossentropy_cost(predictions, train_y_all)
final_acc = ml.categorical_accuracy(predictions, train_y_all)
print('training finished!')
print('train cost: {0}, acc: {1}'.format(final_cost, final_acc))

test_h = ml.softmax_regression_model(test_X, W, b)
test_cost = ml.crossentropy_cost(test_h, test_y_all)
test_acc = ml.categorical_accuracy(test_h, test_y_all)
print('test cost: {0}, acc: {1}'.format(test_cost, test_acc))


epoch: 10000, cost: 0.040805551375077395, acc: 0.975
epoch: 20000, cost: 0.02969192477577395, acc: 0.975
epoch: 30000, cost: 0.02494988618992539, acc: 0.975
epoch: 40000, cost: 0.02219898562705884, acc: 0.9833333333333333
epoch: 50000, cost: 0.020356746091358142, acc: 0.9833333333333333
epoch: 60000, cost: 0.019015789622029135, acc: 0.9833333333333333
epoch: 70000, cost: 0.01798504255574665, acc: 0.9833333333333333
epoch: 80000, cost: 0.017161654145229403, acc: 0.9833333333333333
epoch: 90000, cost: 0.016484771512794216, acc: 0.9833333333333333
epoch: 100000, cost: 0.01591581752130277, acc: 0.9833333333333333
training finished!
train cost: 0.015915765086367127, acc: 0.9833333333333333
test cost: 0.026905784702824006, acc: 0.9666666666666667

let's take a look


In [47]:
np.argmax(ml.softmax_regression_model(train_X[0:4], W, b), axis=1)


Out[47]:
array([1, 0, 1, 1])

In [48]:
np.argmax(train_y_all[0:4], axis=1)


Out[48]:
array([1, 0, 1, 1])

In [ ]: