Iris Demo

  • Check any null and invalid values
  • Ensure the properties of features and labels
  • Convert the string value into computational forms
  • PCA -> Cluster Verification (optional)
  • Logistic Regreesion/SVM (optional)

Import Iris DataSet from sklearn


In [8]:
from sklearn.datasets import load_iris
irisdata = load_iris()

Convert data form into Pandas format


In [54]:
import pandas as pd
features = pd.DataFrame(irisdata['data'])
features.columns = irisdata['feature_names']
targets = pd.DataFrame(irisdata['target'])
targets = targets.replace([0,1,2],irisdata['target_names'])

Check if there is any null values


In [55]:
features.isnull().sum()


Out[55]:
sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

In [56]:
targets.isnull().sum()


Out[56]:
0    0
dtype: int64

3 group to classify


In [57]:
targets[0].unique()


Out[57]:
array(['setosa', 'versicolor', 'virginica'], dtype=object)

150 instances and 4 features


In [58]:
features.shape


Out[58]:
(150, 4)

Convet all the unique string values into integers. Perform label encoding on the data


In [66]:
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
for col in targets.columns:
    targets[col] = labelencoder.fit_transform(targets[col])

Check the encoded values


In [64]:
targets[0].unique()


Out[64]:
array([0, 1, 2], dtype=int64)

In [65]:
print(targets.groupby(0).size())


0
0    50
1    50
2    50
dtype: int64

Plot boxplot to visualize the distribution of the data


In [68]:
import matplotlib.pyplot as plt
%matplotlib inline

fig,axes = plt.subplots(nrows=2,ncols=2,figsize=(9,9))

fig1 = axes[0,0].boxplot(features['sepal length (cm)'],patch_artist=True)
fig2 = axes[0,1].boxplot(features['sepal width (cm)'],patch_artist=True)
fig3 = axes[1,0].boxplot(features['petal length (cm)'],patch_artist=True)
fig4 = axes[1,1].boxplot(features['petal width (cm)'],patch_artist=True)


Info of features


In [69]:
features.describe()


Out[69]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000

In [70]:
features.corr()


Out[70]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
sepal length (cm) 1.000000 -0.109369 0.871754 0.817954
sepal width (cm) -0.109369 1.000000 -0.420516 -0.356544
petal length (cm) 0.871754 -0.420516 1.000000 0.962757
petal width (cm) 0.817954 -0.356544 0.962757 1.000000

Standardising the features


In [72]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(features)
X


Out[72]:
array([[ -9.00681170e-01,   1.03205722e+00,  -1.34127240e+00,
         -1.31297673e+00],
       [ -1.14301691e+00,  -1.24957601e-01,  -1.34127240e+00,
         -1.31297673e+00],
       [ -1.38535265e+00,   3.37848329e-01,  -1.39813811e+00,
         -1.31297673e+00],
       [ -1.50652052e+00,   1.06445364e-01,  -1.28440670e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,   1.26346019e+00,  -1.34127240e+00,
         -1.31297673e+00],
       [ -5.37177559e-01,   1.95766909e+00,  -1.17067529e+00,
         -1.05003079e+00],
       [ -1.50652052e+00,   8.00654259e-01,  -1.34127240e+00,
         -1.18150376e+00],
       [ -1.02184904e+00,   8.00654259e-01,  -1.28440670e+00,
         -1.31297673e+00],
       [ -1.74885626e+00,  -3.56360566e-01,  -1.34127240e+00,
         -1.31297673e+00],
       [ -1.14301691e+00,   1.06445364e-01,  -1.28440670e+00,
         -1.44444970e+00],
       [ -5.37177559e-01,   1.49486315e+00,  -1.28440670e+00,
         -1.31297673e+00],
       [ -1.26418478e+00,   8.00654259e-01,  -1.22754100e+00,
         -1.31297673e+00],
       [ -1.26418478e+00,  -1.24957601e-01,  -1.34127240e+00,
         -1.44444970e+00],
       [ -1.87002413e+00,  -1.24957601e-01,  -1.51186952e+00,
         -1.44444970e+00],
       [ -5.25060772e-02,   2.18907205e+00,  -1.45500381e+00,
         -1.31297673e+00],
       [ -1.73673948e-01,   3.11468391e+00,  -1.28440670e+00,
         -1.05003079e+00],
       [ -5.37177559e-01,   1.95766909e+00,  -1.39813811e+00,
         -1.05003079e+00],
       [ -9.00681170e-01,   1.03205722e+00,  -1.34127240e+00,
         -1.18150376e+00],
       [ -1.73673948e-01,   1.72626612e+00,  -1.17067529e+00,
         -1.18150376e+00],
       [ -9.00681170e-01,   1.72626612e+00,  -1.28440670e+00,
         -1.18150376e+00],
       [ -5.37177559e-01,   8.00654259e-01,  -1.17067529e+00,
         -1.31297673e+00],
       [ -9.00681170e-01,   1.49486315e+00,  -1.28440670e+00,
         -1.05003079e+00],
       [ -1.50652052e+00,   1.26346019e+00,  -1.56873522e+00,
         -1.31297673e+00],
       [ -9.00681170e-01,   5.69251294e-01,  -1.17067529e+00,
         -9.18557817e-01],
       [ -1.26418478e+00,   8.00654259e-01,  -1.05694388e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,  -1.24957601e-01,  -1.22754100e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,   8.00654259e-01,  -1.22754100e+00,
         -1.05003079e+00],
       [ -7.79513300e-01,   1.03205722e+00,  -1.28440670e+00,
         -1.31297673e+00],
       [ -7.79513300e-01,   8.00654259e-01,  -1.34127240e+00,
         -1.31297673e+00],
       [ -1.38535265e+00,   3.37848329e-01,  -1.22754100e+00,
         -1.31297673e+00],
       [ -1.26418478e+00,   1.06445364e-01,  -1.22754100e+00,
         -1.31297673e+00],
       [ -5.37177559e-01,   8.00654259e-01,  -1.28440670e+00,
         -1.05003079e+00],
       [ -7.79513300e-01,   2.42047502e+00,  -1.28440670e+00,
         -1.44444970e+00],
       [ -4.16009689e-01,   2.65187798e+00,  -1.34127240e+00,
         -1.31297673e+00],
       [ -1.14301691e+00,   1.06445364e-01,  -1.28440670e+00,
         -1.44444970e+00],
       [ -1.02184904e+00,   3.37848329e-01,  -1.45500381e+00,
         -1.31297673e+00],
       [ -4.16009689e-01,   1.03205722e+00,  -1.39813811e+00,
         -1.31297673e+00],
       [ -1.14301691e+00,   1.06445364e-01,  -1.28440670e+00,
         -1.44444970e+00],
       [ -1.74885626e+00,  -1.24957601e-01,  -1.39813811e+00,
         -1.31297673e+00],
       [ -9.00681170e-01,   8.00654259e-01,  -1.28440670e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,   1.03205722e+00,  -1.39813811e+00,
         -1.18150376e+00],
       [ -1.62768839e+00,  -1.74477836e+00,  -1.39813811e+00,
         -1.18150376e+00],
       [ -1.74885626e+00,   3.37848329e-01,  -1.39813811e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,   1.03205722e+00,  -1.22754100e+00,
         -7.87084847e-01],
       [ -9.00681170e-01,   1.72626612e+00,  -1.05694388e+00,
         -1.05003079e+00],
       [ -1.26418478e+00,  -1.24957601e-01,  -1.34127240e+00,
         -1.18150376e+00],
       [ -9.00681170e-01,   1.72626612e+00,  -1.22754100e+00,
         -1.31297673e+00],
       [ -1.50652052e+00,   3.37848329e-01,  -1.34127240e+00,
         -1.31297673e+00],
       [ -6.58345429e-01,   1.49486315e+00,  -1.28440670e+00,
         -1.31297673e+00],
       [ -1.02184904e+00,   5.69251294e-01,  -1.34127240e+00,
         -1.31297673e+00],
       [  1.40150837e+00,   3.37848329e-01,   5.35295827e-01,
          2.64698913e-01],
       [  6.74501145e-01,   3.37848329e-01,   4.21564419e-01,
          3.96171883e-01],
       [  1.28034050e+00,   1.06445364e-01,   6.49027235e-01,
          3.96171883e-01],
       [ -4.16009689e-01,  -1.74477836e+00,   1.37235899e-01,
          1.33225943e-01],
       [  7.95669016e-01,  -5.87763531e-01,   4.78430123e-01,
          3.96171883e-01],
       [ -1.73673948e-01,  -5.87763531e-01,   4.21564419e-01,
          1.33225943e-01],
       [  5.53333275e-01,   5.69251294e-01,   5.35295827e-01,
          5.27644853e-01],
       [ -1.14301691e+00,  -1.51337539e+00,  -2.60824029e-01,
         -2.61192967e-01],
       [  9.16836886e-01,  -3.56360566e-01,   4.78430123e-01,
          1.33225943e-01],
       [ -7.79513300e-01,  -8.19166497e-01,   8.03701950e-02,
          2.64698913e-01],
       [ -1.02184904e+00,  -2.43898725e+00,  -1.47092621e-01,
         -2.61192967e-01],
       [  6.86617933e-02,  -1.24957601e-01,   2.50967307e-01,
          3.96171883e-01],
       [  1.89829664e-01,  -1.97618132e+00,   1.37235899e-01,
         -2.61192967e-01],
       [  3.10997534e-01,  -3.56360566e-01,   5.35295827e-01,
          2.64698913e-01],
       [ -2.94841818e-01,  -3.56360566e-01,  -9.02269170e-02,
          1.33225943e-01],
       [  1.03800476e+00,   1.06445364e-01,   3.64698715e-01,
          2.64698913e-01],
       [ -2.94841818e-01,  -1.24957601e-01,   4.21564419e-01,
          3.96171883e-01],
       [ -5.25060772e-02,  -8.19166497e-01,   1.94101603e-01,
         -2.61192967e-01],
       [  4.32165405e-01,  -1.97618132e+00,   4.21564419e-01,
          3.96171883e-01],
       [ -2.94841818e-01,  -1.28197243e+00,   8.03701950e-02,
         -1.29719997e-01],
       [  6.86617933e-02,   3.37848329e-01,   5.92161531e-01,
          7.90590793e-01],
       [  3.10997534e-01,  -5.87763531e-01,   1.37235899e-01,
          1.33225943e-01],
       [  5.53333275e-01,  -1.28197243e+00,   6.49027235e-01,
          3.96171883e-01],
       [  3.10997534e-01,  -5.87763531e-01,   5.35295827e-01,
          1.75297293e-03],
       [  6.74501145e-01,  -3.56360566e-01,   3.07833011e-01,
          1.33225943e-01],
       [  9.16836886e-01,  -1.24957601e-01,   3.64698715e-01,
          2.64698913e-01],
       [  1.15917263e+00,  -5.87763531e-01,   5.92161531e-01,
          2.64698913e-01],
       [  1.03800476e+00,  -1.24957601e-01,   7.05892939e-01,
          6.59117823e-01],
       [  1.89829664e-01,  -3.56360566e-01,   4.21564419e-01,
          3.96171883e-01],
       [ -1.73673948e-01,  -1.05056946e+00,  -1.47092621e-01,
         -2.61192967e-01],
       [ -4.16009689e-01,  -1.51337539e+00,   2.35044910e-02,
         -1.29719997e-01],
       [ -4.16009689e-01,  -1.51337539e+00,  -3.33612130e-02,
         -2.61192967e-01],
       [ -5.25060772e-02,  -8.19166497e-01,   8.03701950e-02,
          1.75297293e-03],
       [  1.89829664e-01,  -8.19166497e-01,   7.62758643e-01,
          5.27644853e-01],
       [ -5.37177559e-01,  -1.24957601e-01,   4.21564419e-01,
          3.96171883e-01],
       [  1.89829664e-01,   8.00654259e-01,   4.21564419e-01,
          5.27644853e-01],
       [  1.03800476e+00,   1.06445364e-01,   5.35295827e-01,
          3.96171883e-01],
       [  5.53333275e-01,  -1.74477836e+00,   3.64698715e-01,
          1.33225943e-01],
       [ -2.94841818e-01,  -1.24957601e-01,   1.94101603e-01,
          1.33225943e-01],
       [ -4.16009689e-01,  -1.28197243e+00,   1.37235899e-01,
          1.33225943e-01],
       [ -4.16009689e-01,  -1.05056946e+00,   3.64698715e-01,
          1.75297293e-03],
       [  3.10997534e-01,  -1.24957601e-01,   4.78430123e-01,
          2.64698913e-01],
       [ -5.25060772e-02,  -1.05056946e+00,   1.37235899e-01,
          1.75297293e-03],
       [ -1.02184904e+00,  -1.74477836e+00,  -2.60824029e-01,
         -2.61192967e-01],
       [ -2.94841818e-01,  -8.19166497e-01,   2.50967307e-01,
          1.33225943e-01],
       [ -1.73673948e-01,  -1.24957601e-01,   2.50967307e-01,
          1.75297293e-03],
       [ -1.73673948e-01,  -3.56360566e-01,   2.50967307e-01,
          1.33225943e-01],
       [  4.32165405e-01,  -3.56360566e-01,   3.07833011e-01,
          1.33225943e-01],
       [ -9.00681170e-01,  -1.28197243e+00,  -4.31421141e-01,
         -1.29719997e-01],
       [ -1.73673948e-01,  -5.87763531e-01,   1.94101603e-01,
          1.33225943e-01],
       [  5.53333275e-01,   5.69251294e-01,   1.27454998e+00,
          1.71090158e+00],
       [ -5.25060772e-02,  -8.19166497e-01,   7.62758643e-01,
          9.22063763e-01],
       [  1.52267624e+00,  -1.24957601e-01,   1.21768427e+00,
          1.18500970e+00],
       [  5.53333275e-01,  -3.56360566e-01,   1.04708716e+00,
          7.90590793e-01],
       [  7.95669016e-01,  -1.24957601e-01,   1.16081857e+00,
          1.31648267e+00],
       [  2.12851559e+00,  -1.24957601e-01,   1.61574420e+00,
          1.18500970e+00],
       [ -1.14301691e+00,  -1.28197243e+00,   4.21564419e-01,
          6.59117823e-01],
       [  1.76501198e+00,  -3.56360566e-01,   1.44514709e+00,
          7.90590793e-01],
       [  1.03800476e+00,  -1.28197243e+00,   1.16081857e+00,
          7.90590793e-01],
       [  1.64384411e+00,   1.26346019e+00,   1.33141568e+00,
          1.71090158e+00],
       [  7.95669016e-01,   3.37848329e-01,   7.62758643e-01,
          1.05353673e+00],
       [  6.74501145e-01,  -8.19166497e-01,   8.76490051e-01,
          9.22063763e-01],
       [  1.15917263e+00,  -1.24957601e-01,   9.90221459e-01,
          1.18500970e+00],
       [ -1.73673948e-01,  -1.28197243e+00,   7.05892939e-01,
          1.05353673e+00],
       [ -5.25060772e-02,  -5.87763531e-01,   7.62758643e-01,
          1.57942861e+00],
       [  6.74501145e-01,   3.37848329e-01,   8.76490051e-01,
          1.44795564e+00],
       [  7.95669016e-01,  -1.24957601e-01,   9.90221459e-01,
          7.90590793e-01],
       [  2.24968346e+00,   1.72626612e+00,   1.67260991e+00,
          1.31648267e+00],
       [  2.24968346e+00,  -1.05056946e+00,   1.78634131e+00,
          1.44795564e+00],
       [  1.89829664e-01,  -1.97618132e+00,   7.05892939e-01,
          3.96171883e-01],
       [  1.28034050e+00,   3.37848329e-01,   1.10395287e+00,
          1.44795564e+00],
       [ -2.94841818e-01,  -5.87763531e-01,   6.49027235e-01,
          1.05353673e+00],
       [  2.24968346e+00,  -5.87763531e-01,   1.67260991e+00,
          1.05353673e+00],
       [  5.53333275e-01,  -8.19166497e-01,   6.49027235e-01,
          7.90590793e-01],
       [  1.03800476e+00,   5.69251294e-01,   1.10395287e+00,
          1.18500970e+00],
       [  1.64384411e+00,   3.37848329e-01,   1.27454998e+00,
          7.90590793e-01],
       [  4.32165405e-01,  -5.87763531e-01,   5.92161531e-01,
          7.90590793e-01],
       [  3.10997534e-01,  -1.24957601e-01,   6.49027235e-01,
          7.90590793e-01],
       [  6.74501145e-01,  -5.87763531e-01,   1.04708716e+00,
          1.18500970e+00],
       [  1.64384411e+00,  -1.24957601e-01,   1.16081857e+00,
          5.27644853e-01],
       [  1.88617985e+00,  -5.87763531e-01,   1.33141568e+00,
          9.22063763e-01],
       [  2.49201920e+00,   1.72626612e+00,   1.50201279e+00,
          1.05353673e+00],
       [  6.74501145e-01,  -5.87763531e-01,   1.04708716e+00,
          1.31648267e+00],
       [  5.53333275e-01,  -5.87763531e-01,   7.62758643e-01,
          3.96171883e-01],
       [  3.10997534e-01,  -1.05056946e+00,   1.04708716e+00,
          2.64698913e-01],
       [  2.24968346e+00,  -1.24957601e-01,   1.33141568e+00,
          1.44795564e+00],
       [  5.53333275e-01,   8.00654259e-01,   1.04708716e+00,
          1.57942861e+00],
       [  6.74501145e-01,   1.06445364e-01,   9.90221459e-01,
          7.90590793e-01],
       [  1.89829664e-01,  -1.24957601e-01,   5.92161531e-01,
          7.90590793e-01],
       [  1.28034050e+00,   1.06445364e-01,   9.33355755e-01,
          1.18500970e+00],
       [  1.03800476e+00,   1.06445364e-01,   1.04708716e+00,
          1.57942861e+00],
       [  1.28034050e+00,   1.06445364e-01,   7.62758643e-01,
          1.44795564e+00],
       [ -5.25060772e-02,  -8.19166497e-01,   7.62758643e-01,
          9.22063763e-01],
       [  1.15917263e+00,   3.37848329e-01,   1.21768427e+00,
          1.44795564e+00],
       [  1.03800476e+00,   5.69251294e-01,   1.10395287e+00,
          1.71090158e+00],
       [  1.03800476e+00,  -1.24957601e-01,   8.19624347e-01,
          1.44795564e+00],
       [  5.53333275e-01,  -1.28197243e+00,   7.05892939e-01,
          9.22063763e-01],
       [  7.95669016e-01,  -1.24957601e-01,   8.19624347e-01,
          1.05353673e+00],
       [  4.32165405e-01,   8.00654259e-01,   9.33355755e-01,
          1.44795564e+00],
       [  6.86617933e-02,  -1.24957601e-01,   7.62758643e-01,
          7.90590793e-01]])

PCA(optional)


In [74]:
from sklearn.decomposition import PCA
pca = PCA()
pca.fit_transform(X)


Out[74]:
array([[ -2.26454173e+00,   5.05703903e-01,  -1.21943348e-01,
         -2.30733235e-02],
       [ -2.08642550e+00,  -6.55404729e-01,  -2.27250832e-01,
         -1.03208244e-01],
       [ -2.36795045e+00,  -3.18477311e-01,   5.14796236e-02,
         -2.78252250e-02],
       [ -2.30419716e+00,  -5.75367713e-01,   9.88604444e-02,
          6.63114622e-02],
       [ -2.38877749e+00,   6.74767397e-01,   2.14278490e-02,
          3.73972870e-02],
       [ -2.07053681e+00,   1.51854856e+00,   3.06842583e-02,
         -4.39877494e-03],
       [ -2.44571134e+00,   7.45626750e-02,   3.42197636e-01,
          3.80965668e-02],
       [ -2.23384186e+00,   2.47613932e-01,  -8.25744645e-02,
          2.55051623e-02],
       [ -2.34195768e+00,  -1.09514636e+00,   1.53562399e-01,
          2.67938291e-02],
       [ -2.18867576e+00,  -4.48629048e-01,  -2.46559522e-01,
          3.99073035e-02],
       [ -2.16348656e+00,   1.07059558e+00,  -2.64009373e-01,
         -1.53011377e-02],
       [ -2.32737775e+00,   1.58587455e-01,   1.00165616e-01,
          1.34554258e-01],
       [ -2.22408272e+00,  -7.09118158e-01,  -2.23214514e-01,
         -2.63061382e-03],
       [ -2.63971626e+00,  -9.38281982e-01,   1.89570030e-01,
          1.94220183e-02],
       [ -2.19229151e+00,   1.88997851e+00,  -4.69480095e-01,
         -1.92782042e-01],
       [ -2.25146521e+00,   2.72237108e+00,   3.26037967e-02,
         -4.71257043e-02],
       [ -2.20275048e+00,   1.51375028e+00,  -1.36349158e-03,
         -1.86631581e-01],
       [ -2.19017916e+00,   5.14304308e-01,  -3.86155949e-02,
         -9.19055067e-02],
       [ -1.89407429e+00,   1.43111071e+00,  -3.70742834e-01,
         -5.95280968e-02],
       [ -2.33994907e+00,   1.15803343e+00,   1.37417719e-01,
          3.98281842e-02],
       [ -1.91455639e+00,   4.30465163e-01,  -4.16006875e-01,
         -1.03602240e-02],
       [ -2.20464540e+00,   9.52457317e-01,   1.64738346e-01,
         -5.77291622e-02],
       [ -2.77416979e+00,   4.89517027e-01,   3.38836384e-01,
         -1.78537297e-02],
       [ -1.82041156e+00,   1.06750793e-01,   4.00614724e-02,
         -1.50345595e-01],
       [ -2.22821750e+00,   1.62186163e-01,   1.24201428e-01,
          2.71228863e-01],
       [ -1.95702401e+00,  -6.07892567e-01,  -2.98591029e-01,
         -4.38372887e-02],
       [ -2.05206331e+00,   2.66014312e-01,   9.20929788e-02,
         -6.66010028e-02],
       [ -2.16819365e+00,   5.52016495e-01,  -2.01295482e-01,
         -9.26056927e-03],
       [ -2.14030596e+00,   3.36640409e-01,  -2.65314545e-01,
         -8.35439339e-02],
       [ -2.26879019e+00,  -3.14878603e-01,   7.55154360e-02,
          1.08849380e-01],
       [ -2.14455443e+00,  -4.83942097e-01,  -6.78557607e-02,
          4.83787691e-02],
       [ -1.83193810e+00,   4.45266836e-01,  -2.65375244e-01,
         -2.39140994e-01],
       [ -2.60820287e+00,   1.82847519e+00,   5.14195182e-02,
          2.31922593e-01],
       [ -2.43795086e+00,   2.18539162e+00,  -7.93497549e-02,
          5.10210291e-02],
       [ -2.18867576e+00,  -4.48629048e-01,  -2.46559522e-01,
          3.99073035e-02],
       [ -2.21111990e+00,  -1.84337811e-01,  -2.18624528e-01,
         -1.68619769e-01],
       [ -2.04441652e+00,   6.84956426e-01,  -4.79411570e-01,
         -1.95613314e-01],
       [ -2.18867576e+00,  -4.48629048e-01,  -2.46559522e-01,
          3.99073035e-02],
       [ -2.43595220e+00,  -8.82169415e-01,   2.01557587e-01,
          9.96079072e-03],
       [ -2.17054720e+00,   2.92726955e-01,  -1.69938536e-01,
         -6.24028506e-03],
       [ -2.28652724e+00,   4.67991716e-01,   4.07365390e-02,
         -1.05718261e-01],
       [ -1.87170722e+00,  -2.32769161e+00,  -1.94528610e-01,
         -2.91692982e-01],
       [ -2.55783442e+00,  -4.53816380e-01,   3.13571838e-01,
          6.74111170e-02],
       [ -1.96427929e+00,   4.97391640e-01,   3.14755610e-01,
         -1.75540206e-01],
       [ -2.13337283e+00,   1.17143211e+00,   2.52793222e-01,
          1.53228807e-01],
       [ -2.07535759e+00,  -6.91917347e-01,  -5.65590082e-02,
         -1.40294980e-01],
       [ -2.38125822e+00,   1.15063259e+00,   6.21019035e-02,
          1.54218569e-01],
       [ -2.39819169e+00,  -3.62390765e-01,   1.46855632e-01,
          4.94784238e-02],
       [ -2.22678121e+00,   1.02548255e+00,  -1.76645302e-01,
          1.64443096e-02],
       [ -2.20595417e+00,   3.22378453e-02,  -1.46593527e-01,
         -4.87782024e-02],
       [  1.10399365e+00,   8.63112446e-01,  -6.85555108e-01,
         -3.49778613e-02],
       [  7.32481440e-01,   5.98635573e-01,  -9.40668020e-02,
         -4.45376367e-03],
       [  1.24210951e+00,   6.14822450e-01,  -5.54846534e-01,
         -9.67335738e-03],
       [  3.97307283e-01,  -1.75816895e+00,  -1.85694824e-02,
         -6.73978468e-02],
       [  1.07259395e+00,  -2.11757903e-01,  -3.97447438e-01,
         -1.05541662e-01],
       [  3.84458146e-01,  -5.91062469e-01,   1.26797690e-01,
          2.40528082e-01],
       [  7.48715076e-01,   7.78698611e-01,   1.48656023e-01,
          7.83010665e-02],
       [ -4.97863388e-01,  -1.84886877e+00,   2.55555250e-01,
          3.93891395e-02],
       [  9.26222368e-01,   3.03308268e-02,  -5.95459889e-01,
          2.91024204e-02],
       [  4.96802558e-03,  -1.02940111e+00,   5.42867049e-01,
          2.83487629e-02],
       [ -1.24697461e-01,  -2.65806268e+00,  -3.98134482e-02,
         -1.61405573e-02],
       [  4.38730118e-01,  -5.88812850e-02,   2.06703491e-01,
         -3.98514578e-02],
       [  5.51633981e-01,  -1.77258156e+00,  -7.61380223e-01,
         -4.83536968e-02],
       [  7.17165066e-01,  -1.85434315e-01,  -6.72998424e-02,
          1.64555675e-01],
       [ -3.72583830e-02,  -4.32795099e-01,   1.98061449e-01,
         -1.09025121e-01],
       [  8.75890536e-01,   5.09998151e-01,  -5.03505832e-01,
         -1.05141287e-01],
       [  3.48006402e-01,  -1.90621647e-01,   4.92831518e-01,
          1.92059489e-01],
       [  1.53392545e-01,  -7.90725456e-01,  -2.98604516e-01,
          2.04321215e-01],
       [  1.21530321e+00,  -1.63335564e+00,  -4.79409914e-01,
         -2.28214500e-01],
       [  1.56941176e-01,  -1.30310327e+00,  -1.68586746e-01,
          5.04131971e-02],
       [  7.38256104e-01,   4.02470382e-01,   6.16772626e-01,
          8.44515277e-02],
       [  4.72369682e-01,  -4.16608222e-01,  -2.62718283e-01,
         -1.14244715e-01],
       [  1.22798821e+00,  -9.40914793e-01,  -3.66704859e-01,
          8.44834785e-03],
       [  6.29381045e-01,  -4.16811643e-01,  -2.89962474e-01,
          2.73494879e-01],
       [  7.00472799e-01,  -6.34939277e-02,  -4.44767559e-01,
         -4.40812895e-02],
       [  8.73536987e-01,   2.50708611e-01,  -4.72148886e-01,
         -1.02121003e-01],
       [  1.25422219e+00,  -8.26200998e-02,  -7.26843529e-01,
         -4.08294177e-02],
       [  1.35823985e+00,   3.28820266e-01,  -2.61458074e-01,
         -6.70137909e-02],
       [  6.62126138e-01,  -2.24346071e-01,   8.73681069e-02,
          3.63525363e-02],
       [ -4.72815133e-02,  -1.05721241e+00,  -3.15319195e-01,
         -6.60077099e-02],
       [  1.21534209e-01,  -1.56359238e+00,  -1.45241738e-01,
          7.87527979e-03],
       [  1.41182261e-02,  -1.57339235e+00,  -2.36581428e-01,
          3.11492615e-02],
       [  2.36010837e-01,  -7.75923784e-01,  -1.47972885e-01,
         -2.44595546e-02],
       [  1.05669143e+00,  -6.36901284e-01,   1.06753234e-01,
          1.83419236e-01],
       [  2.21417088e-01,  -2.80847693e-01,   6.67559660e-01,
          2.55550383e-01],
       [  4.31783161e-01,   8.55136920e-01,   4.50731487e-01,
          1.11146169e-01],
       [  1.04941336e+00,   5.22197265e-01,  -3.96142266e-01,
         -3.72988658e-02],
       [  1.03587821e+00,  -1.39246648e+00,  -6.85434303e-01,
         -1.39128619e-01],
       [  6.70675999e-02,  -2.12620735e-01,   2.94128262e-01,
          1.47491049e-01],
       [  2.75425066e-01,  -1.32981591e+00,   9.34447685e-02,
         -9.94752060e-03],
       [  2.72335066e-01,  -1.11944152e+00,   9.81718909e-02,
          2.69842632e-01],
       [  6.23170540e-01,   2.75426333e-02,  -1.93046544e-02,
          1.47722637e-01],
       [  3.30005364e-01,  -9.88900732e-01,  -1.95968073e-01,
         -7.62651620e-03],
       [ -3.73627623e-01,  -2.01793227e+00,   1.12184053e-01,
         -2.10814710e-02],
       [  2.82944343e-01,  -8.53950717e-01,   1.34118823e-01,
          1.06873761e-01],
       [  8.90531103e-02,  -1.74908548e-01,   1.31448375e-01,
          2.30135987e-01],
       [  2.24356783e-01,  -3.80484659e-01,   1.58769003e-01,
          1.32578640e-01],
       [  5.73883486e-01,  -1.53719974e-01,  -2.70039416e-01,
          1.94096051e-02],
       [ -4.57012873e-01,  -1.53946451e+00,   1.96126173e-01,
         -2.00883380e-01],
       [  2.52244473e-01,  -5.95860746e-01,   9.47499397e-02,
          5.82952756e-02],
       [  1.84767259e+00,   8.71696662e-01,   1.00276099e+00,
          5.10680368e-02],
       [  1.15318981e+00,  -7.01326114e-01,   5.31464635e-01,
          4.04135807e-02],
       [  2.20634950e+00,   5.54470105e-01,  -2.05495910e-01,
         -5.93004996e-02],
       [  1.43868540e+00,  -5.00105223e-02,   1.63390464e-01,
          2.35759861e-01],
       [  1.86789070e+00,   2.91192802e-01,   3.94004333e-01,
          1.67817996e-02],
       [  2.75419671e+00,   7.88432206e-01,  -5.86232704e-01,
          1.00879674e-01],
       [  3.58374475e-01,  -1.56009458e+00,   9.90999895e-01,
          1.32987438e-01],
       [  2.30300590e+00,   4.09516695e-01,  -6.54166687e-01,
          2.37212798e-01],
       [  2.00173530e+00,  -7.23865359e-01,  -3.94070448e-01,
          8.49938224e-02],
       [  2.26755460e+00,   1.92144299e+00,   3.92517658e-01,
         -1.02907298e-01],
       [  1.36590943e+00,   6.93948040e-01,   2.83279516e-01,
         -1.07010918e-01],
       [  1.59906459e+00,  -4.28248836e-01,   2.33040821e-02,
         -5.89427002e-02],
       [  1.88425185e+00,   4.14332758e-01,   2.45485540e-02,
         -1.46296964e-01],
       [  1.25308651e+00,  -1.16739134e+00,   5.82130271e-01,
         -9.96816830e-02],
       [  1.46406152e+00,  -4.44147569e-01,   1.00411052e+00,
         -2.75022173e-01],
       [  1.59180930e+00,   6.77035372e-01,   6.36650721e-01,
         -1.90645618e-01],
       [  1.47128019e+00,   2.53192472e-01,   3.66575092e-02,
          1.55435928e-01],
       [  2.43737848e+00,   2.55675734e+00,  -1.34200082e-01,
          2.75661550e-01],
       [  3.30914118e+00,  -2.36132010e-03,  -7.06933959e-01,
         -4.67561875e-02],
       [  1.25398099e+00,  -1.71758384e+00,  -2.64622084e-01,
          6.30674020e-02],
       [  2.04049626e+00,   9.07398765e-01,   2.31878114e-01,
         -1.67140048e-01],
       [  9.73915114e-01,  -5.71174376e-01,   8.29503781e-01,
         -2.73189478e-02],
       [  2.89806444e+00,   3.97791359e-01,  -8.60926842e-01,
          1.26074286e-01],
       [  1.32919369e+00,  -4.86760542e-01,  -4.70734933e-03,
         -1.40597876e-01],
       [  1.70424071e+00,   1.01414842e+00,   2.95957877e-01,
          6.27403760e-02],
       [  1.95772766e+00,   1.00333452e+00,  -4.22817052e-01,
          2.18459131e-01],
       [  1.17190451e+00,  -3.18896617e-01,   1.30651910e-01,
         -1.25685467e-01],
       [  1.01978105e+00,   6.55429631e-02,   3.38042170e-01,
          9.06850833e-03],
       [  1.78600886e+00,  -1.93272800e-01,   2.70002526e-01,
         -3.12072991e-02],
       [  1.86477791e+00,   5.55381532e-01,  -7.17510683e-01,
          2.07556768e-01],
       [  2.43549739e+00,   2.46654468e-01,  -7.30234006e-01,
          1.67936017e-02],
       [  2.31608241e+00,   2.62618387e+00,  -4.99619543e-01,
          2.13160418e-01],
       [  1.86037143e+00,  -1.84672394e-01,   3.53330279e-01,
         -1.00039482e-01],
       [  1.11127173e+00,  -2.95986102e-01,  -1.82659608e-01,
          1.85740240e-01],
       [  1.19746916e+00,  -8.17167742e-01,  -1.63213782e-01,
          4.88404000e-01],
       [  2.80094940e+00,   8.44748194e-01,  -5.47000957e-01,
         -2.96321147e-01],
       [  1.58015525e+00,   1.07247450e+00,   9.43392608e-01,
         -3.36074229e-02],
       [  1.34704442e+00,   4.22255966e-01,   1.80028706e-01,
          2.15906539e-01],
       [  9.23432978e-01,   1.92303705e-02,   4.17394303e-01,
         -4.74424586e-03],
       [  1.85355198e+00,   6.72422729e-01,  -1.48203294e-02,
         -1.94875449e-01],
       [  2.01615720e+00,   6.10397038e-01,   4.25914947e-01,
         -2.46764702e-01],
       [  1.90311686e+00,   6.86024832e-01,   1.27799364e-01,
         -4.69214421e-01],
       [  1.15318981e+00,  -7.01326114e-01,   5.31464635e-01,
          4.04135807e-02],
       [  2.04330844e+00,   8.64684880e-01,   3.35266061e-01,
         -4.42781979e-02],
       [  2.00169097e+00,   1.04855005e+00,   6.29268888e-01,
         -2.12588357e-01],
       [  1.87052207e+00,   3.82821838e-01,   2.54532319e-01,
         -3.88890487e-01],
       [  1.55849189e+00,  -9.05313601e-01,  -2.53819099e-02,
         -2.21322184e-01],
       [  1.52084506e+00,   2.66794575e-01,   1.79277203e-01,
         -1.18903043e-01],
       [  1.37639119e+00,   1.01636193e+00,   9.31405052e-01,
         -2.41461953e-02],
       [  9.59298576e-01,  -2.22839447e-02,   5.28794187e-01,
          1.63675806e-01]])

In [76]:
covariance = pca.get_covariance()
explained_variance = pca.explained_variance_
explained_variance


Out[76]:
array([ 2.91081808,  0.92122093,  0.14735328,  0.02060771])

In [78]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(6, 4))
    
plt.bar(range(4), explained_variance, alpha=0.5, align='center',
            label='individual explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc='best')
plt.tight_layout()


The last 1 componen has less amount of variance of the data. The first 3 components retains more than 90% of the data.(Here, compared with only 4 features, there're enough instances to support the final results. We shall take all features into consideration)

Consider first 3 components and visualise it using K-means clustering


In [116]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
x_pca = pca.fit_transform(X)

kmeans = KMeans(n_clusters=3, random_state=5)
x_clustered = kmeans.fit_predict(x_pca)

y = targets.values
y = y.reshape(y.size)

In [119]:
import matplotlib.pyplot as plt
%matplotlib inline

LABEL_COLOR_MAP = {0 : 'g',
                   1 : 'y',
                   2 : 'r'
                  }

label_color = [LABEL_COLOR_MAP[i] for i in x_clustered]
y_color = [LABEL_COLOR_MAP[i] for i in y]

fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(5,3))

axes[0].scatter(X[:,0],X[:,1], c= label_color)
axes[0].set_title('PCA')
axes[1].scatter(X[:,0],X[:,1], c= y_color)
axes[1].set_title('True Cluster');


Using K-means, we are able to segregate 3 classes well using the first 3 components with maximum variance. (Don't mind the color type, which is meaningless in clustering).

You can apply PCA firstly before using machine learning in the next steps

Splitting the data into training and testing dataset


In [122]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=4)

Default Logistic Regression(optional)


In [129]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import metrics

modelLR = LogisticRegression(n_jobs=-1)

modelLR.fit(X_train,y_train);

In [130]:
y_pred = modelLR.predict(X_test)
modelLR.score(X_test,y_pred)


Out[130]:
1.0

In [131]:
confusion_matrix=metrics.confusion_matrix(y_test,y_pred)
confusion_matrix


Out[131]:
array([[15,  1,  0],
       [ 0,  5,  0],
       [ 0,  1,  8]])

In [147]:
import matplotlib.pyplot as plt
%matplotlib inline

LABEL_COLOR_MAP = {0 : 'g',
                   1 : 'y',
                   2 : 'r'
                  }

pred_color = [LABEL_COLOR_MAP[i] for i in y_pred]
test_color = [LABEL_COLOR_MAP[i] for i in y_test]

fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(5,2))

axes[0].scatter(X_test[:,0],X_test[:,1], c= pred_color)
axes[0].set_title('Predicted')
axes[1].scatter(X_test[:,0],X_test[:,1], c= test_color)
axes[1].set_title('True');


Tuned Logistic Regression(optional)


In [137]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

LRs= LogisticRegression()

tuned_parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000] ,
              'penalty':['l1','l2']
                   }

modelLR=GridSearchCV(LRs, tuned_parameters,cv=10)

Search best combinations of parameter values based on the dataset.

  • "C": Inverse of regularization strength
  • "Penalty": The norm used in the penalization

In [140]:
modelLR.fit(X_train,y_train)


Out[140]:
GridSearchCV(cv=10, error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'penalty': ['l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [141]:
print(modelLR.best_params_)


{'penalty': 'l1', 'C': 1000}

In [142]:
y_pred = modelLR.predict(X_test)
modelLR.score(X_test,y_pred)


Out[142]:
1.0

In [143]:
confusion_matrix=metrics.confusion_matrix(y_test,y_pred)
confusion_matrix


Out[143]:
array([[15,  1,  0],
       [ 0,  4,  1],
       [ 0,  1,  8]])

In [144]:
auc_roc=metrics.classification_report(y_test,y_pred)
auc_roc


Out[144]:
'             precision    recall  f1-score   support\n\n          0       1.00      0.94      0.97        16\n          1       0.67      0.80      0.73         5\n          2       0.89      0.89      0.89         9\n\navg / total       0.91      0.90      0.90        30\n'

In [148]:
import matplotlib.pyplot as plt
%matplotlib inline

LABEL_COLOR_MAP = {0 : 'g',
                   1 : 'y',
                   2 : 'r'
                  }

pred_color = [LABEL_COLOR_MAP[i] for i in y_pred]
test_color = [LABEL_COLOR_MAP[i] for i in y_test]

fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(5,2))

axes[0].scatter(X_test[:,0],X_test[:,1], c= pred_color)
axes[0].set_title('Predicted')
axes[1].scatter(X_test[:,0],X_test[:,1], c= test_color)
axes[1].set_title('True');


SVM(optional)


In [149]:
from sklearn.svm import SVC

svm= SVC()
tuned_parameters = {
 'C': [1, 10, 100,500, 1000], 'kernel': ['linear','rbf'],
 'C': [1, 10, 100,500, 1000], 'gamma': [1,0.1,0.01,0.001, 0.0001], 'kernel': ['rbf'],
 #'degree': [2,3,4,5,6] , 'C':[1,10,100,500,1000] , 'kernel':['poly']
    }

In [165]:
from sklearn.model_selection import RandomizedSearchCV
modelsvm = RandomizedSearchCV(svm, tuned_parameters,cv=10,scoring='accuracy',n_iter=20)

In [166]:
modelsvm.fit(X_train, y_train)
print(modelsvm.best_score_)


0.975

In [170]:
modelsvm.cv_results_


Out[170]:
{'mean_fit_time': array([ 0.0003    ,  0.0006    ,  0.0006    ,  0.0006    ,  0.00039999,
         0.00050001,  0.00019999,  0.00070002,  0.00080004,  0.00059998,
         0.00030003,  0.0003    ,  0.00039999,  0.00050001,  0.00050001,
         0.00049999,  0.00080006,  0.0003    ,  0.00039997,  0.00029998]),
 'mean_score_time': array([  3.99994850e-04,   1.99985504e-04,   0.00000000e+00,
          1.99985504e-04,   0.00000000e+00,   2.00033188e-04,
          1.00016594e-04,   0.00000000e+00,   1.99985504e-04,
          1.00016594e-04,   9.99927521e-05,   9.99927521e-05,
          0.00000000e+00,   1.99985504e-04,   2.00009346e-04,
          9.99927521e-05,   9.99927521e-05,   3.99994850e-04,
          0.00000000e+00,   9.99927521e-05]),
 'mean_test_score': array([ 0.95833333,  0.975     ,  0.9       ,  0.375     ,  0.975     ,
         0.95833333,  0.975     ,  0.96666667,  0.9       ,  0.375     ,
         0.96666667,  0.95833333,  0.95833333,  0.96666667,  0.9       ,
         0.95833333,  0.95833333,  0.975     ,  0.975     ,  0.95833333]),
 'mean_train_score': array([ 1.        ,  0.97874474,  0.90183299,  0.37499581,  0.97501444,
         1.        ,  0.97873561,  0.98427465,  0.90460243,  0.37499581,
         0.97966185,  0.99813917,  1.        ,  0.98057928,  0.90460243,
         0.99258314,  1.        ,  0.97778388,  0.98243129,  0.96572858]),
 'param_C': masked_array(data = [1000 1 1 1 1000 10 10 10 10 10 1000 100 1000 100 100 1000 500 500 500 500],
              mask = [False False False False False False False False False False False False
  False False False False False False False False],
        fill_value = ?),
 'param_gamma': masked_array(data = [1 0.1 0.01 0.001 0.001 1 0.1 0.01 0.001 0.0001 0.0001 0.1 0.1 0.001 0.0001
  0.01 0.1 0.01 0.001 0.0001],
              mask = [False False False False False False False False False False False False
  False False False False False False False False],
        fill_value = ?),
 'param_kernel': masked_array(data = ['rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf'
  'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf' 'rbf'],
              mask = [False False False False False False False False False False False False
  False False False False False False False False],
        fill_value = ?),
 'params': ({'C': 1000, 'gamma': 1, 'kernel': 'rbf'},
  {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'},
  {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'},
  {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'},
  {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 1, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'},
  {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'},
  {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'},
  {'C': 100, 'gamma': 0.1, 'kernel': 'rbf'},
  {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'},
  {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'},
  {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'},
  {'C': 1000, 'gamma': 0.01, 'kernel': 'rbf'},
  {'C': 500, 'gamma': 0.1, 'kernel': 'rbf'},
  {'C': 500, 'gamma': 0.01, 'kernel': 'rbf'},
  {'C': 500, 'gamma': 0.001, 'kernel': 'rbf'},
  {'C': 500, 'gamma': 0.0001, 'kernel': 'rbf'}),
 'rank_test_score': array([ 9,  1, 16, 19,  1,  9,  1,  6, 16, 19,  6,  9,  9,  6, 16,  9,  9,
         1,  1,  9]),
 'split0_test_score': array([ 1.        ,  1.        ,  1.        ,  0.35714286,  1.        ,
         1.        ,  1.        ,  1.        ,  1.        ,  0.35714286,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ]),
 'split0_train_score': array([ 1.        ,  0.99056604,  0.89622642,  0.37735849,  0.97169811,
         1.        ,  0.97169811,  0.98113208,  0.89622642,  0.37735849,
         0.98113208,  0.99056604,  1.        ,  0.98113208,  0.89622642,
         0.99056604,  1.        ,  0.97169811,  0.98113208,  0.96226415]),
 'split1_test_score': array([ 0.92307692,  1.        ,  0.92307692,  0.38461538,  1.        ,
         0.92307692,  1.        ,  1.        ,  0.92307692,  0.38461538,
         1.        ,  0.92307692,  0.92307692,  1.        ,  0.92307692,
         1.        ,  0.92307692,  1.        ,  1.        ,  0.92307692]),
 'split1_train_score': array([ 1.        ,  0.98130841,  0.88785047,  0.37383178,  0.97196262,
         1.        ,  0.99065421,  0.99065421,  0.88785047,  0.37383178,
         0.99065421,  1.        ,  1.        ,  0.99065421,  0.88785047,
         0.99065421,  1.        ,  0.97196262,  0.98130841,  0.97196262]),
 'split2_test_score': array([ 0.84615385,  0.84615385,  0.84615385,  0.38461538,  0.92307692,
         0.84615385,  0.92307692,  0.84615385,  0.84615385,  0.38461538,
         0.84615385,  0.92307692,  0.92307692,  0.84615385,  0.84615385,
         0.92307692,  0.92307692,  0.92307692,  0.92307692,  0.84615385]),
 'split2_train_score': array([ 1.        ,  0.99065421,  0.90654206,  0.37383178,  0.98130841,
         1.        ,  0.98130841,  0.98130841,  0.90654206,  0.37383178,
         0.98130841,  1.        ,  1.        ,  0.98130841,  0.90654206,
         0.98130841,  1.        ,  0.98130841,  0.99065421,  0.96261682]),
 'split3_test_score': array([ 1.        ,  1.        ,  0.76923077,  0.38461538,  0.92307692,
         1.        ,  0.92307692,  0.92307692,  0.76923077,  0.38461538,
         0.92307692,  0.92307692,  0.92307692,  0.92307692,  0.76923077,
         0.92307692,  0.92307692,  0.92307692,  0.92307692,  0.92307692]),
 'split3_train_score': array([ 1.        ,  0.97196262,  0.90654206,  0.37383178,  0.98130841,
         1.        ,  0.99065421,  0.99065421,  0.91588785,  0.37383178,
         0.98130841,  1.        ,  1.        ,  0.98130841,  0.91588785,
         1.        ,  1.        ,  0.99065421,  0.99065421,  0.96261682]),
 'split4_test_score': array([ 0.91666667,  1.        ,  0.91666667,  0.41666667,  0.91666667,
         0.91666667,  0.91666667,  1.        ,  0.91666667,  0.41666667,
         1.        ,  0.91666667,  0.91666667,  1.        ,  0.91666667,
         0.91666667,  0.91666667,  0.91666667,  0.91666667,  1.        ]),
 'split4_train_score': array([ 1.        ,  0.97222222,  0.90740741,  0.37037037,  0.98148148,
         1.        ,  0.98148148,  0.99074074,  0.90740741,  0.37037037,
         0.98148148,  1.        ,  1.        ,  0.98148148,  0.90740741,
         1.        ,  1.        ,  0.98148148,  0.98148148,  0.96296296]),
 'split5_test_score': array([ 1.        ,  1.        ,  1.        ,  0.36363636,  1.        ,
         1.        ,  1.        ,  1.        ,  1.        ,  0.36363636,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ]),
 'split5_train_score': array([ 1.        ,  0.97247706,  0.89908257,  0.37614679,  0.97247706,
         1.        ,  0.97247706,  0.98165138,  0.89908257,  0.37614679,
         0.98165138,  1.        ,  1.        ,  0.98165138,  0.89908257,
         0.99082569,  1.        ,  0.97247706,  0.97247706,  0.97247706]),
 'split6_test_score': array([ 1.        ,  0.90909091,  0.81818182,  0.36363636,  1.        ,
         1.        ,  1.        ,  0.90909091,  0.81818182,  0.36363636,
         0.90909091,  1.        ,  1.        ,  0.90909091,  0.81818182,
         0.90909091,  1.        ,  1.        ,  1.        ,  0.90909091]),
 'split6_train_score': array([ 1.        ,  0.98165138,  0.91743119,  0.37614679,  0.97247706,
         1.        ,  0.98165138,  0.98165138,  0.9266055 ,  0.37614679,
         0.97247706,  1.        ,  1.        ,  0.97247706,  0.9266055 ,
         1.        ,  1.        ,  0.98165138,  0.98165138,  0.96330275]),
 'split7_test_score': array([ 1.        ,  1.        ,  0.90909091,  0.36363636,  1.        ,
         1.        ,  1.        ,  1.        ,  0.90909091,  0.36363636,
         1.        ,  1.        ,  1.        ,  1.        ,  0.90909091,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ]),
 'split7_train_score': array([ 1.        ,  0.98165138,  0.89908257,  0.37614679,  0.97247706,
         1.        ,  0.97247706,  0.98165138,  0.89908257,  0.37614679,
         0.98165138,  1.        ,  1.        ,  0.98165138,  0.89908257,
         0.99082569,  1.        ,  0.97247706,  0.98165138,  0.97247706]),
 'split8_test_score': array([ 1.        ,  1.        ,  0.81818182,  0.36363636,  1.        ,
         1.        ,  1.        ,  1.        ,  0.81818182,  0.36363636,
         1.        ,  1.        ,  1.        ,  1.        ,  0.81818182,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ]),
 'split8_train_score': array([ 1.        ,  0.97247706,  0.89908257,  0.37614679,  0.97247706,
         1.        ,  0.97247706,  0.98165138,  0.90825688,  0.37614679,
         0.97247706,  1.        ,  1.        ,  0.98165138,  0.90825688,
         0.99082569,  1.        ,  0.97247706,  0.98165138,  0.95412844]),
 'split9_test_score': array([ 0.90909091,  1.        ,  1.        ,  0.36363636,  1.        ,
         0.90909091,  1.        ,  1.        ,  1.        ,  0.36363636,
         1.        ,  0.90909091,  0.90909091,  1.        ,  1.        ,
         0.90909091,  0.90909091,  1.        ,  1.        ,  1.        ]),
 'split9_train_score': array([ 1.        ,  0.97247706,  0.89908257,  0.37614679,  0.97247706,
         1.        ,  0.97247706,  0.98165138,  0.89908257,  0.37614679,
         0.97247706,  0.99082569,  1.        ,  0.97247706,  0.89908257,
         0.99082569,  1.        ,  0.98165138,  0.98165138,  0.97247706]),
 'std_fit_time': array([ 0.00045826,  0.0004899 ,  0.0004899 ,  0.0004899 ,  0.00048989,
         0.00050001,  0.00039997,  0.00045827,  0.00040002,  0.00048988,
         0.0004583 ,  0.00045826,  0.00048989,  0.00050001,  0.00050001,
         0.00049999,  0.00040003,  0.00045826,  0.00048986,  0.00045822]),
 'std_score_time': array([ 0.00048989,  0.00039997,  0.        ,  0.00039997,  0.        ,
         0.00040007,  0.00030005,  0.        ,  0.00039997,  0.00030005,
         0.00029998,  0.00029998,  0.        ,  0.00039997,  0.00040002,
         0.00029998,  0.00029998,  0.00048989,  0.        ,  0.00029998]),
 'std_test_score': array([ 0.05404662,  0.05192955,  0.08054182,  0.01732187,  0.03676269,
         0.05404662,  0.03676269,  0.05340031,  0.08054182,  0.01732187,
         0.05340031,  0.04048439,  0.04048439,  0.05340031,  0.08054182,
         0.04189912,  0.04048439,  0.03676269,  0.03676269,  0.05355015]),
 'std_train_score': array([ 0.        ,  0.0071515 ,  0.00758902,  0.00194076,  0.00416598,
         0.        ,  0.0071622 ,  0.00419869,  0.01038592,  0.00194076,
         0.00542685,  0.00372211,  0.        ,  0.0048829 ,  0.01038592,
         0.00558832,  0.        ,  0.00614057,  0.00490513,  0.0059578 ])}

In [155]:
print(modelsvm.best_params_)


{'kernel': 'rbf', 'C': 1, 'gamma': 0.1}

In [157]:
y_pred= modelsvm.predict(X_test)
print(metrics.accuracy_score(y_pred,y_test))


0.933333333333

In [158]:
confusion_matrix=metrics.confusion_matrix(y_test,y_pred)
confusion_matrix


Out[158]:
array([[16,  0,  0],
       [ 0,  4,  1],
       [ 0,  1,  8]])

In [159]:
auc_roc=metrics.classification_report(y_test,y_pred)
auc_roc


Out[159]:
'             precision    recall  f1-score   support\n\n          0       1.00      1.00      1.00        16\n          1       0.80      0.80      0.80         5\n          2       0.89      0.89      0.89         9\n\navg / total       0.93      0.93      0.93        30\n'

In [160]:
import matplotlib.pyplot as plt
%matplotlib inline

LABEL_COLOR_MAP = {0 : 'g',
                   1 : 'y',
                   2 : 'r'
                  }

pred_color = [LABEL_COLOR_MAP[i] for i in y_pred]
test_color = [LABEL_COLOR_MAP[i] for i in y_test]

fig,axes = plt.subplots(nrows=1,ncols=2,figsize=(5,2))

axes[0].scatter(X_test[:,0],X_test[:,1], c= pred_color)
axes[0].set_title('Predicted')
axes[1].scatter(X_test[:,0],X_test[:,1], c= test_color)
axes[1].set_title('True');



In [ ]: