In [1]:
%matplotlib inline
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os

In [2]:
# Dataset: https://archive.ics.uci.edu/ml/datasets/Iris/
# IRIS Dataset Size: 150 samples
# Train: 70%  Eval: 30%

In [3]:
data_path = r'..\Data\ClassExamples\Iris'

In [4]:
df = pd.read_csv(os.path.join(data_path, 'iris.data.csv'))

In [5]:
df.head()


Out[5]:
sepal_length sepal_width petal_length petal_width class
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa

In [6]:
df.tail()


Out[6]:
sepal_length sepal_width petal_length petal_width class
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

In [7]:
np.random.seed(5)
l = list(df.index)
np.random.shuffle(l)

In [8]:
l[:5]


Out[8]:
[82, 134, 114, 42, 109]

In [9]:
df = df.iloc[l]

In [10]:
df.head()


Out[10]:
sepal_length sepal_width petal_length petal_width class
82 5.8 2.7 3.9 1.2 Iris-versicolor
134 6.1 2.6 5.6 1.4 Iris-virginica
114 5.8 2.8 5.1 2.4 Iris-virginica
42 4.4 3.2 1.3 0.2 Iris-setosa
109 7.2 3.6 6.1 2.5 Iris-virginica

In [11]:
df.to_csv(os.path.join(data_path, 'iris_data_train.csv'),
          index = True,
          index_label = 'Row',
          columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

In [12]:
df.to_csv(os.path.join(data_path,'iris_data_classifier_test.csv'),
          index = True,
          index_label = 'Row',
          columns = ['sepal_length','sepal_width','petal_length','petal_width'])

In [13]:
df['class'].value_counts()


Out[13]:
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: class, dtype: int64

In [14]:
setosa = df['class'] == 'Iris-setosa'
versicolor = df['class'] == 'Iris-versicolor'
virginica = df['class'] == 'Iris-virginica'

In [15]:
setosa.head()


Out[15]:
82     False
134    False
114    False
42      True
109    False
Name: class, dtype: bool

In [16]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(df[setosa].sepal_length,
            y = df[setosa].sepal_width, 
            label = 'setosa',
            color = 'g')
plt.scatter(df[versicolor].sepal_length,
            y = df[versicolor].sepal_width, 
            label = 'versicolor',
            color = 'r')
plt.scatter(df[virginica].sepal_length,
            y = df[virginica].sepal_width, 
            label = 'virginica',
            color = 'b')
plt.xlabel('length')
plt.ylabel('width')
plt.title('sepal')
plt.grid(True)
plt.legend()


Out[16]:
<matplotlib.legend.Legend at 0x237cc448ef0>

In [17]:
fig = plt.figure(figsize = (12, 8))
plt.scatter(df[setosa].petal_length,
            y = df[setosa].petal_width, 
            label = 'setosa',
            color = 'g')
plt.scatter(df[versicolor].petal_length,
            y = df[versicolor].petal_width, 
            label = 'versicolor',
            color = 'r')
plt.scatter(df[virginica].petal_length,
            y = df[virginica].petal_width,
            label = 'virginica',
            color = 'b')
plt.xlabel('length')
plt.ylabel('width')
plt.title('petal')
plt.grid(True)
plt.legend()


Out[17]:
<matplotlib.legend.Legend at 0x237cc556be0>

In [18]:
fig = plt.figure(figsize = (12, 8))
plt.hist([df[setosa].petal_length,
          df[versicolor].petal_length,
          df[virginica].petal_length],
         bins = 10,
         label = ['setosa',
                  'versicolor',
                  'virginica'])

plt.title('petal length')
plt.xlabel('petal length')
plt.ylabel('count')
plt.legend()


Out[18]:
<matplotlib.legend.Legend at 0x237cc3a2208>

In [19]:
fig = plt.figure(figsize = (12, 8))
plt.hist([df[setosa].petal_width,
          df[versicolor].petal_width,
          df[virginica].petal_width],
         bins = 10,
        label = ['setosa',
                 'versicolor',
                 'virginica'])

plt.title('petal width')
plt.xlabel('petal width')
plt.ylabel('count')
plt.legend()


Out[19]:
<matplotlib.legend.Legend at 0x237ccab1e80>

In [20]:
fig = plt.figure(figsize = (12, 8))
plt.hist([df[setosa].sepal_length,
          df[versicolor].sepal_length,
          df[virginica].sepal_length],
         bins = 10,
         label = ['setosa',
                  'versicolor',
                  'virginica'])

plt.title('sepal length')
plt.xlabel('sepal length')
plt.ylabel('count')
plt.legend()


Out[20]:
<matplotlib.legend.Legend at 0x237ccce21d0>

In [21]:
fig = plt.figure(figsize = (12, 8))
plt.hist([df[setosa].sepal_width,
          df[versicolor].sepal_width,
          df[virginica].sepal_width],
         bins = 10,
         label = ['setosa',
                  'versicolor',
                  'virginica'])

plt.title('sepal width')
plt.xlabel('sepal width')
plt.ylabel('count')
plt.legend()


Out[21]:
<matplotlib.legend.Legend at 0x237cceec128>

In [22]:
df_predict_default = pd.read_csv(
    os.path.join(
        data_path,
        'output_default',
        'bp-yVKPO2ydD0u-iris_data_train.csv.gz'))
df_predict_default.index = df_predict_default.tag

In [23]:
df_predict_default.head()


Out[23]:
tag trueLabel Iris-virginica Iris-versicolor Iris-setosa
tag
82 82 Iris-versicolor 0.015082 0.983628 0.001290
134 134 Iris-virginica 0.962326 0.037216 0.000458
114 114 Iris-virginica 0.985210 0.011761 0.003029
42 42 Iris-setosa 0.000258 0.004150 0.995592
109 109 Iris-virginica 0.996544 0.000627 0.002829

In [24]:
def predicted_class(row):
    if row['Iris-setosa'] >= row['Iris-versicolor'] and row['Iris-setosa'] >= row['Iris-virginica']:           
        return "Iris-setosa"
        
    if row['Iris-versicolor'] >= row['Iris-setosa'] and row['Iris-versicolor'] >= row['Iris-virginica']:
        return "Iris-versicolor"
    
    return "Iris-virginica"

In [25]:
lst_predicted=[]
for index, row in df_predict_default.iterrows():
    lst_predicted.append(predicted_class(row))
    
df_predict_default['predicted_default'] = lst_predicted

In [26]:
df_predict_default.head()


Out[26]:
tag trueLabel Iris-virginica Iris-versicolor Iris-setosa predicted_default
tag
82 82 Iris-versicolor 0.015082 0.983628 0.001290 Iris-versicolor
134 134 Iris-virginica 0.962326 0.037216 0.000458 Iris-virginica
114 114 Iris-virginica 0.985210 0.011761 0.003029 Iris-virginica
42 42 Iris-setosa 0.000258 0.004150 0.995592 Iris-setosa
109 109 Iris-virginica 0.996544 0.000627 0.002829 Iris-virginica

In [27]:
df_predict_numeric = pd.read_csv(
    os.path.join(
        data_path,
        'output_numeric',
        'bp-K58XKrCYvk4-iris_data_train.csv.gz'))
df_predict_numeric.index = df_predict_numeric.tag

In [28]:
lst_predicted=[]
for index, row in df_predict_numeric.iterrows():
    lst_predicted.append(predicted_class(row))
    
df_predict_numeric['predicted_numeric'] = lst_predicted

In [29]:
df_predict_numeric.head()


Out[29]:
tag trueLabel Iris-virginica Iris-versicolor Iris-setosa predicted_numeric
tag
82 82 Iris-versicolor 0.432319 0.473640 0.094041 Iris-versicolor
134 134 Iris-virginica 0.695479 0.297776 0.006744 Iris-virginica
114 114 Iris-virginica 0.776126 0.217024 0.006849 Iris-virginica
42 42 Iris-setosa 0.020863 0.099928 0.879209 Iris-setosa
109 109 Iris-virginica 0.766280 0.228180 0.005540 Iris-virginica

In [30]:
print('Confusion matrix - Actual versus prediction with bin recipe')
cf_bin_recipe = pd.crosstab(df['class'], 
                            df_predict_default.predicted_default)


Confusion matrix - Actual versus prediction with bin recipe

In [31]:
cf_bin_recipe


Out[31]:
predicted_default Iris-setosa Iris-versicolor Iris-virginica
class
Iris-setosa 50 0 0
Iris-versicolor 0 49 1
Iris-virginica 0 4 46

Prediction with default recipe is good


In [32]:
print('Confusion matrix - Actual versus prediction with numeric recipe')
cf_num_recipe = pd.crosstab(df['class'],
                            df_predict_numeric.predicted_numeric)


Confusion matrix - Actual versus prediction with numeric recipe

In [33]:
cf_num_recipe


Out[33]:
predicted_numeric Iris-setosa Iris-versicolor Iris-virginica
class
Iris-setosa 50 0 0
Iris-versicolor 0 20 30
Iris-virginica 0 0 50

Versicolor - 30 examples got misclassified as Virginica


In [34]:
def print_metrics(cf_matrix):
    # Note:AWS ML computes all these for you...demo to show how these are calculated.
    
    # Total samples is sum of all columns in each row
    total_samples = cf_matrix.sum(axis = 1).sum()
    # Diagonal contains correct class predictions
    accuracy = np.diag(cf_matrix).sum() / total_samples
    
    print('Accuracy: {0:0.3f}'.format(accuracy))
    print('\n')
    
    # TPR, Recall = True Positive/Actual Positive
    recall = np.diag(cf_matrix) / cf_matrix.sum(axis = 1)
    print('recall')
    print(recall)
    print('\n')
    
    # Precision = True Positive/Predicted Positive
    precision = np.diag(cf_matrix) / cf_matrix.sum(axis = 0)
    print('precision')
    print(precision)
    print('\n')
    
    f1_scores = 2 * recall * precision / (recall + precision)
    print('f1 scores')
    print(f1_scores)
    print('\n')
    
    print('average f1 score {0:0.3f}'.format(f1_scores.mean()))
    print('\n')

In [35]:
print_metrics(cf_bin_recipe)


Accuracy: 0.967


recall
class
Iris-setosa        1.00
Iris-versicolor    0.98
Iris-virginica     0.92
dtype: float64


precision
predicted_default
Iris-setosa        1.000000
Iris-versicolor    0.924528
Iris-virginica     0.978723
dtype: float64


f1 scores
class
Iris-setosa        1.000000
Iris-versicolor    0.951456
Iris-virginica     0.948454
dtype: float64


average f1 score 0.967



In [36]:
print_metrics(cf_num_recipe)


Accuracy: 0.800


recall
class
Iris-setosa        1.0
Iris-versicolor    0.4
Iris-virginica     1.0
dtype: float64


precision
predicted_numeric
Iris-setosa        1.000
Iris-versicolor    1.000
Iris-virginica     0.625
dtype: float64


f1 scores
class
Iris-setosa        1.000000
Iris-versicolor    0.571429
Iris-virginica     0.769231
dtype: float64


average f1 score 0.780



In [37]:
df_predict_numeric.predicted_numeric.value_counts()


Out[37]:
Iris-virginica     80
Iris-setosa        50
Iris-versicolor    20
Name: predicted_numeric, dtype: int64

In [38]:
df_predict_default.predicted_default.value_counts()


Out[38]:
Iris-versicolor    53
Iris-setosa        50
Iris-virginica     47
Name: predicted_default, dtype: int64

Multi-Class Evaluation Metric

  1. F1 Score is a binary classification metric. It is harmonic mean of precision and recall
    F1 Score = 2 X Precision X Recall / (Precision + Recall)
    Higher F1 Score reflects better predictive accuracy

  2. Multi-Class Evaluation
    Average of class wise F1 Score

  3. Baseline F1 Score = Hypothetical model that predicts only most frequent class as the answer

  4. Visualization - Confusion Matrix - Available on AWS ML Console
    Matrix. Rows = true class. Columns = predicted class
    Cell color – diagonal indicates true class prediction %
    Cell color – non-diagonal indicates incorrect prediction %
    Last column is F1 score for that class. Last but one column is true class distribution
    Last row is predicted class distribution
    Upto 10 classes are shown – listed from most frequent to least frequent
    For more than 10 classes, first 9 most freq. classes are shown and 10th class will collapse rest of the classes and mark as other
    You can download the confusion matrix thru url - Explore Performance page under Evaluations

Prediction Summary

  1. Eval with default recipe settings. Average F1 score: 0.905
  2. Eval with numeric recipe settings: Average F1 score: 0.827
  3. Batch prediction Results (predict all 150 example outcome):
    a. With default recipe settings: Average F1 Score: 0.973
    b. With numeric recipe settings: Average F1 Score: 0.78
  4. Classification was better with binning. Versicolor classification was impacted when numeric setting was used
  5. Higher F1 Score implies better prediction accuracy.