Gradient Descent


In [1]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

Linear Algebra with Numpy


In [2]:
a = np.array([1, 3, 2, 4])

In [3]:
a


Out[3]:
array([1, 3, 2, 4])

In [4]:
type(a)


Out[4]:
numpy.ndarray

In [5]:
A = np.array([[3, 1, 2],
              [2, 3, 4]])

B = np.array([[0, 1],
              [2, 3],
              [4, 5]])

C = np.array([[0, 1],
              [2, 3],
              [4, 5],
              [0, 1],
              [2, 3],
              [4, 5]])

print("A is a {} matrix".format(A.shape))
print("B is a {} matrix".format(B.shape))
print("C is a {} matrix".format(C.shape))


A is a (2, 3) matrix
B is a (3, 2) matrix
C is a (6, 2) matrix

In [6]:
A[0]


Out[6]:
array([3, 1, 2])

In [7]:
C[2, 0]


Out[7]:
4

In [8]:
B[:, 0]


Out[8]:
array([0, 2, 4])

Elementwise operations


In [9]:
3 * A


Out[9]:
array([[ 9,  3,  6],
       [ 6,  9, 12]])

In [10]:
A + A


Out[10]:
array([[6, 2, 4],
       [4, 6, 8]])

In [11]:
A * A


Out[11]:
array([[ 9,  1,  4],
       [ 4,  9, 16]])

In [12]:
A / A


Out[12]:
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [13]:
A - A


Out[13]:
array([[0, 0, 0],
       [0, 0, 0]])

In [14]:
# A + B will not work, because A and B need to have same dimensionality!

In [15]:
# A * B (element-wise mutliplication) will not work, because A and B need to have same dimensionality!

Dot product


In [16]:
A.shape


Out[16]:
(2, 3)

In [17]:
B.shape


Out[17]:
(3, 2)

In [18]:
A.dot(B)


Out[18]:
array([[10, 16],
       [22, 31]])

In [19]:
np.dot(A, B)


Out[19]:
array([[10, 16],
       [22, 31]])

In [20]:
B.dot(A)


Out[20]:
array([[ 2,  3,  4],
       [12, 11, 16],
       [22, 19, 28]])

In [21]:
C.shape


Out[21]:
(6, 2)

In [22]:
A.shape


Out[22]:
(2, 3)

In [23]:
C.dot(A)


Out[23]:
array([[ 2,  3,  4],
       [12, 11, 16],
       [22, 19, 28],
       [ 2,  3,  4],
       [12, 11, 16],
       [22, 19, 28]])

In [24]:
# A.dot(C) will not work, because shapes (2,3) and (6,2) are not aligned: 3 (dim 1) != 6 (dim 0)

Gradient descent


In [25]:
df = pd.read_csv('./data/banknotes.csv')

In [26]:
df.head()


Out[26]:
variace skewness curtosis entropy class
0 3.62160 8.6661 -2.8073 -0.44699 0
1 4.54590 8.1674 -2.4586 -1.46210 0
2 3.86600 -2.6383 1.9242 0.10645 0
3 3.45660 9.5228 -4.0112 -3.59440 0
4 0.32924 -4.4552 4.5718 -0.98880 0

In [27]:
df['class'].value_counts()


Out[27]:
0    762
1    610
Name: class, dtype: int64

In [28]:
import notebook
import seaborn as sns


/home/arcyfelix/.local/lib/python3.5/site-packages/IPython/html.py:14: ShimWarning: The `IPython.html` package has been deprecated. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.
  "`IPython.html.widgets` has moved to `ipywidgets`.", ShimWarning)

In [29]:
sns.pairplot(df, hue = "class")


Out[29]:
<seaborn.axisgrid.PairGrid at 0x7f81f8f2c588>

Baseline model


In [30]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import scale

In [31]:
X = scale(df.drop('class', axis = 1).values)
y = df['class'].values

In [32]:
model = RandomForestClassifier()
cross_val_score(model, X, y)


Out[32]:
array([ 0.99344978,  0.98905908,  0.99343545])

Logistic Regression Model


In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.3,
                                                    random_state = 42)

In [34]:
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD


Using TensorFlow backend.

In [35]:
K.clear_session()

model = Sequential()
model.add(Dense(1, 
                input_shape = (4,), 
                activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy',
              optimizer = 'sgd',
              metrics = ['accuracy'])

history = model.fit(X_train, y_train)
result = model.evaluate(X_test, y_test)


Epoch 1/10
960/960 [==============================] - 0s - loss: 0.8023 - acc: 0.5125     
Epoch 2/10
960/960 [==============================] - 0s - loss: 0.7165 - acc: 0.5583     
Epoch 3/10
960/960 [==============================] - 0s - loss: 0.6499 - acc: 0.6187     
Epoch 4/10
960/960 [==============================] - 0s - loss: 0.5985 - acc: 0.6760     
Epoch 5/10
960/960 [==============================] - 0s - loss: 0.5585 - acc: 0.7177     
Epoch 6/10
960/960 [==============================] - 0s - loss: 0.5270 - acc: 0.7490     
Epoch 7/10
960/960 [==============================] - 0s - loss: 0.5016 - acc: 0.7760     
Epoch 8/10
960/960 [==============================] - 0s - loss: 0.4807 - acc: 0.7948     
Epoch 9/10
960/960 [==============================] - 0s - loss: 0.4630 - acc: 0.8146     
Epoch 10/10
960/960 [==============================] - 0s - loss: 0.4479 - acc: 0.8292     
 32/412 [=>............................] - ETA: 0s

In [36]:
historydf = pd.DataFrame(history.history, 
                         index = history.epoch)

In [37]:
historydf.plot(figsize = (20, 10),
               ylim = (0,1))
plt.title("Test accuracy: {:3.2f} %".format(result[1]*100), fontsize = 15)


Out[37]:
<matplotlib.text.Text at 0x7f81d981f240>

Learning Rates


In [38]:
dflist = []

learning_rates = [0.01, 0.05, 0.1, 0.5]

for lr in learning_rates:

    K.clear_session()

    model = Sequential()
    model.add(Dense(1, 
                    input_shape = (4,), 
                    activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy',
                  optimizer = SGD(lr = lr),
                  metrics = ['accuracy'])
    h = model.fit(X_train, 
                  y_train, 
                  batch_size = 16, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, 
                               index = h.epoch))

In [39]:
historydf = pd.concat(dflist, axis = 1)

In [40]:
historydf


Out[40]:
acc loss acc loss acc loss acc loss
0 0.293750 1.118826 0.642708 0.655702 0.692708 0.587479 0.933333 0.251726
1 0.411458 0.896913 0.901042 0.394850 0.892708 0.336193 0.968750 0.123533
2 0.522917 0.735560 0.918750 0.317055 0.925000 0.256493 0.972917 0.093887
3 0.661458 0.620981 0.926042 0.273472 0.938542 0.210767 0.975000 0.079572
4 0.746875 0.539282 0.930208 0.243192 0.956250 0.180594 0.979167 0.070957
5 0.833333 0.480153 0.938542 0.220191 0.960417 0.159272 0.980208 0.065076
6 0.900000 0.436237 0.945833 0.201839 0.964583 0.143446 0.980208 0.060737
7 0.919792 0.402738 0.953125 0.186873 0.966667 0.131278 0.980208 0.057325
8 0.923958 0.376420 0.954167 0.174396 0.969792 0.121620 0.981250 0.054853
9 0.929167 0.355147 0.958333 0.163872 0.969792 0.113810 0.981250 0.052479

In [41]:
metrics_reported = dflist[0].columns
idx = pd.MultiIndex.from_product([learning_rates, metrics_reported],
                                 names = ['learning_rate', 'metric'])

historydf.columns = idx

In [42]:
historydf


Out[42]:
learning_rate 0.01 0.05 0.10 0.50
metric acc loss acc loss acc loss acc loss
0 0.293750 1.118826 0.642708 0.655702 0.692708 0.587479 0.933333 0.251726
1 0.411458 0.896913 0.901042 0.394850 0.892708 0.336193 0.968750 0.123533
2 0.522917 0.735560 0.918750 0.317055 0.925000 0.256493 0.972917 0.093887
3 0.661458 0.620981 0.926042 0.273472 0.938542 0.210767 0.975000 0.079572
4 0.746875 0.539282 0.930208 0.243192 0.956250 0.180594 0.979167 0.070957
5 0.833333 0.480153 0.938542 0.220191 0.960417 0.159272 0.980208 0.065076
6 0.900000 0.436237 0.945833 0.201839 0.964583 0.143446 0.980208 0.060737
7 0.919792 0.402738 0.953125 0.186873 0.966667 0.131278 0.980208 0.057325
8 0.923958 0.376420 0.954167 0.174396 0.969792 0.121620 0.981250 0.054853
9 0.929167 0.355147 0.958333 0.163872 0.969792 0.113810 0.981250 0.052479

In [43]:
fig, ax = plt.subplots(figsize=(15, 10))
ax = plt.subplot(211)
historydf.xs('loss', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()


Batch Sizes


In [44]:
dflist = []

batch_sizes = [16, 32, 64, 128]

for batch_size in batch_sizes:
    K.clear_session()

    model = Sequential()
    model.add(Dense(1, 
                    input_shape = (4,),
                    activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy',
                  optimizer = 'sgd',
                  metrics = ['accuracy'])
    h = model.fit(X_train, 
                  y_train, 
                  batch_size = batch_size, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, 
                               index = h.epoch))

In [45]:
historydf = pd.concat(dflist, 
                      axis = 1)
metrics_reported = dflist[0].columns
idx = pd.MultiIndex.from_product([batch_sizes, metrics_reported],
                                 names = ['batch_size', 'metric'])
historydf.columns = idx

In [46]:
historydf


Out[46]:
batch_size 16 32 64 128
metric acc loss acc loss acc loss acc loss
0 0.867708 0.422572 0.291667 1.264257 0.451042 0.797727 0.695833 0.586326
1 0.883333 0.398793 0.321875 1.101881 0.478125 0.754848 0.710417 0.572669
2 0.891667 0.378933 0.360417 0.962482 0.513542 0.717140 0.720833 0.559699
3 0.893750 0.361967 0.401042 0.847353 0.551042 0.684072 0.732292 0.547187
4 0.896875 0.347161 0.467708 0.754965 0.587500 0.655077 0.742708 0.535217
5 0.900000 0.334085 0.553125 0.682577 0.648958 0.629655 0.746875 0.523760
6 0.907292 0.322351 0.634375 0.625867 0.682292 0.607263 0.754167 0.512624
7 0.910417 0.311752 0.746875 0.580939 0.716667 0.587467 0.758333 0.502218
8 0.914583 0.302071 0.811458 0.544851 0.744792 0.569861 0.766667 0.492151
9 0.915625 0.293171 0.843750 0.515226 0.757292 0.554109 0.769792 0.482395

In [47]:
fig, ax = plt.subplots(figsize=(15, 10))
ax = plt.subplot(211)
historydf.xs('loss', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', 
             axis=1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()


Optimizers


In [48]:
from keras.optimizers import SGD, Adam, Adagrad, RMSprop

In [49]:
dflist = []

optimizers = ['SGD(lr = 0.01)',
              'SGD(lr = 0.01, momentum = 0.3)',
              'SGD(lr = 0.01, momentum = 0.3, nesterov = True)',  
              'Adam(lr = 0.01)',
              'Adagrad(lr = 0.01)',
              'RMSprop(lr = 0.01)']

for opt_name in optimizers:

    K.clear_session()
    
    model = Sequential()
    model.add(Dense(1, 
                    input_shape = (4,), 
                    activation = 'sigmoid'))
    model.compile(loss = 'binary_crossentropy',
                  optimizer = eval(opt_name),
                  metrics = ['accuracy'])
    h = model.fit(X_train, y_train, 
                  batch_size = 16, 
                  epochs = 5, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, 
                               index = h.epoch))

In [50]:
historydf = pd.concat(dflist, 
                      axis = 1)
metrics_reported = dflist[0].columns
idx = pd.MultiIndex.from_product([optimizers, metrics_reported],
                                 names = ['optimizers', 'metric'])
historydf.columns = idx

In [51]:
fig, ax = plt.subplots(figsize=(15, 10))
ax = plt.subplot(211)
historydf.xs('loss', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()



In [52]:
dflist = []

initializers = ['zeros', 
                'uniform', 
                'normal',
                'he_normal', 
                'lecun_uniform']

for init in initializers:

    K.clear_session()

    model = Sequential()
    model.add(Dense(1, 
                    input_shape = (4,),
                    kernel_initializer = init,
                    activation = 'sigmoid'))

    model.compile(loss = 'binary_crossentropy',
                  optimizer = 'rmsprop',
                  metrics = ['accuracy'])

    h = model.fit(X_train, y_train, 
                  batch_size = 16, 
                  epochs = 5, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, index=h.epoch))

In [53]:
historydf = pd.concat(dflist, 
                      axis = 1)
metrics_reported = dflist[0].columns
idx = pd.MultiIndex.from_product([initializers, metrics_reported],
                                 names = ['initializers', 'metric'])

historydf.columns = idx

In [54]:
fig, ax = plt.subplots(figsize=(15, 10))
ax = plt.subplot(211)
historydf.xs('loss', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', axis=1, level='metric').plot(ylim=(0,1), ax=ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()


Inner layer representation


In [55]:
K.clear_session()

model = Sequential()
model.add(Dense(2, 
                input_shape = (4,), 
                activation = 'relu'))
model.add(Dense(1, 
                activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy',
              optimizer = RMSprop(lr = 0.01),
              metrics = ['accuracy'])

h = model.fit(X_train, y_train, 
              batch_size = 16, 
              epochs = 20,
              verbose = 1, 
              validation_split = 0.3)
result = model.evaluate(X_test, y_test)


Train on 672 samples, validate on 288 samples
Epoch 1/20
672/672 [==============================] - 0s - loss: 0.4858 - acc: 0.7411 - val_loss: 0.4339 - val_acc: 0.8611
Epoch 2/20
672/672 [==============================] - 0s - loss: 0.3545 - acc: 0.9241 - val_loss: 0.3145 - val_acc: 0.9549
Epoch 3/20
672/672 [==============================] - 0s - loss: 0.2491 - acc: 0.9747 - val_loss: 0.2148 - val_acc: 0.9861
Epoch 4/20
672/672 [==============================] - 0s - loss: 0.1742 - acc: 0.9866 - val_loss: 0.1574 - val_acc: 0.9896
Epoch 5/20
672/672 [==============================] - 0s - loss: 0.1315 - acc: 0.9866 - val_loss: 0.1198 - val_acc: 0.9861
Epoch 6/20
672/672 [==============================] - 0s - loss: 0.1038 - acc: 0.9896 - val_loss: 0.0937 - val_acc: 0.9861
Epoch 7/20
672/672 [==============================] - 0s - loss: 0.0839 - acc: 0.9851 - val_loss: 0.0757 - val_acc: 0.9826
Epoch 8/20
672/672 [==============================] - 0s - loss: 0.0712 - acc: 0.9866 - val_loss: 0.0645 - val_acc: 0.9861
Epoch 9/20
672/672 [==============================] - 0s - loss: 0.0624 - acc: 0.9836 - val_loss: 0.0555 - val_acc: 0.9861
Epoch 10/20
672/672 [==============================] - 0s - loss: 0.0560 - acc: 0.9881 - val_loss: 0.0484 - val_acc: 0.9861
Epoch 11/20
672/672 [==============================] - 0s - loss: 0.0493 - acc: 0.9851 - val_loss: 0.0422 - val_acc: 0.9861
Epoch 12/20
672/672 [==============================] - 0s - loss: 0.0472 - acc: 0.9851 - val_loss: 0.0394 - val_acc: 0.9931
Epoch 13/20
672/672 [==============================] - 0s - loss: 0.0439 - acc: 0.9896 - val_loss: 0.0368 - val_acc: 0.9896
Epoch 14/20
672/672 [==============================] - 0s - loss: 0.0403 - acc: 0.9896 - val_loss: 0.0352 - val_acc: 0.9896
Epoch 15/20
672/672 [==============================] - 0s - loss: 0.0392 - acc: 0.9881 - val_loss: 0.0342 - val_acc: 0.9896
Epoch 16/20
672/672 [==============================] - 0s - loss: 0.0377 - acc: 0.9851 - val_loss: 0.0309 - val_acc: 0.9896
Epoch 17/20
672/672 [==============================] - 0s - loss: 0.0361 - acc: 0.9881 - val_loss: 0.0294 - val_acc: 0.9896
Epoch 18/20
672/672 [==============================] - 0s - loss: 0.0345 - acc: 0.9866 - val_loss: 0.0283 - val_acc: 0.9896
Epoch 19/20
672/672 [==============================] - 0s - loss: 0.0337 - acc: 0.9896 - val_loss: 0.0288 - val_acc: 0.9896
Epoch 20/20
672/672 [==============================] - 0s - loss: 0.0331 - acc: 0.9881 - val_loss: 0.0263 - val_acc: 0.9896
 32/412 [=>............................] - ETA: 0s

In [56]:
result


Out[56]:
[0.032583816718418625, 0.98786407766990292]

In [57]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 2)                 10        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 3         
=================================================================
Total params: 13
Trainable params: 13
Non-trainable params: 0
_________________________________________________________________

In [58]:
model.layers


Out[58]:
[<keras.layers.core.Dense at 0x7f81d8623ba8>,
 <keras.layers.core.Dense at 0x7f81d8c13fd0>]

In [59]:
inp = model.layers[0].input
out = model.layers[0].output

In [60]:
inp


Out[60]:
<tf.Tensor 'dense_1_input:0' shape=(?, 4) dtype=float32>

In [61]:
out


Out[61]:
<tf.Tensor 'dense_1/Relu:0' shape=(?, 2) dtype=float32>

In [62]:
features_function = K.function([inp], [out])

In [63]:
features_function


Out[63]:
<keras.backend.tensorflow_backend.Function at 0x7f81d8c13748>

In [64]:
features_function([X_test])[0].shape


Out[64]:
(412, 2)

In [65]:
features = features_function([X_test])[0]

In [66]:
plt.scatter(features[:, 0], features[:, 1], c = y_test, cmap='coolwarm')


Out[66]:
<matplotlib.collections.PathCollection at 0x7f81d8d32358>

In [67]:
K.clear_session()

model = Sequential()
model.add(Dense(3, 
                input_shape = (4,), 
                activation = 'relu'))
model.add(Dense(2, 
                activation = 'relu'))
model.add(Dense(1, 
                activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy',
              optimizer = RMSprop(lr=0.01),
              metrics = ['accuracy'])

In [68]:
inp = model.layers[0].input
out = model.layers[1].output
features_function = K.function([inp], [out])

plt.figure(figsize = (15,10))

for i in range(1, 26):
    plt.subplot(5, 5, i)
    h = model.fit(X_train, y_train, 
                  batch_size = 16, 
                  epochs = 1, 
                  verbose = 0)
    test_accuracy = model.evaluate(X_test, y_test)[1]
    features = features_function([X_test])[0]
    plt.scatter(features[:, 0], 
                features[:, 1], 
                c = y_test, 
                cmap = 'coolwarm')
    plt.xlim(-0.5, 3.5)
    plt.ylim(-0.5, 4.0)
    plt.title('Epoch: {}, Test Acc: {:3.1f} %'.format(i, test_accuracy * 100.0))

plt.tight_layout()


 32/412 [=>............................] - ETA: 0s

Exercise 1

You've just been hired at a wine company and they would like you to help them build a model that predicts the quality of their wine based on several measurements. They give you a dataset with wine

  • Load the ../data/wines.csv into Pandas
  • Use the column called "Class" as target
  • Check how many classes are there in target, and if necessary use dummy columns for a multi-class classification
  • Use all the other columns as features, check their range and distribution (using seaborn pairplot)
  • Rescale all the features using either MinMaxScaler or StandardScaler
  • Build a deep model with at least 1 hidden layer to classify the data
  • Choose the cost function, what will you use? Mean Squared Error? Binary Cross-Entropy? Categorical Cross-Entropy?
  • Choose an optimizer
  • Choose a value for the learning rate, you may want to try with several values
  • Choose a batch size
  • Train your model on all the data using a validation_split=0.2. Can you converge to 100% validation accuracy?
  • What's the minumum number of epochs to converge?
  • Repeat the training several times to verify how stable your results are

In [69]:
ex1 = pd.read_csv('./data/wines.csv')

In [70]:
ex1.head()


Out[70]:
Class Alcohol Malic_acid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids Nonflavanoid_phenols Proanthocyanins Color_intensity Hue OD280-OD315_of_diluted_wines Proline
0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735

In [71]:
ex1.describe()


Out[71]:
Class Alcohol Malic_acid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids Nonflavanoid_phenols Proanthocyanins Color_intensity Hue OD280-OD315_of_diluted_wines Proline
count 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000 178.000000
mean 1.938202 13.000618 2.336348 2.366517 19.494944 99.741573 2.295112 2.029270 0.361854 1.590899 5.058090 0.957449 2.611685 746.893258
std 0.775035 0.811827 1.117146 0.274344 3.339564 14.282484 0.625851 0.998859 0.124453 0.572359 2.318286 0.228572 0.709990 314.907474
min 1.000000 11.030000 0.740000 1.360000 10.600000 70.000000 0.980000 0.340000 0.130000 0.410000 1.280000 0.480000 1.270000 278.000000
25% 1.000000 12.362500 1.602500 2.210000 17.200000 88.000000 1.742500 1.205000 0.270000 1.250000 3.220000 0.782500 1.937500 500.500000
50% 2.000000 13.050000 1.865000 2.360000 19.500000 98.000000 2.355000 2.135000 0.340000 1.555000 4.690000 0.965000 2.780000 673.500000
75% 3.000000 13.677500 3.082500 2.557500 21.500000 107.000000 2.800000 2.875000 0.437500 1.950000 6.200000 1.120000 3.170000 985.000000
max 3.000000 14.830000 5.800000 3.230000 30.000000 162.000000 3.880000 5.080000 0.660000 3.580000 13.000000 1.710000 4.000000 1680.000000

In [72]:
ex1.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 178 entries, 0 to 177
Data columns (total 14 columns):
Class                           178 non-null int64
Alcohol                         178 non-null float64
Malic_acid                      178 non-null float64
Ash                             178 non-null float64
Alcalinity_of_ash               178 non-null float64
Magnesium                       178 non-null int64
Total_phenols                   178 non-null float64
Flavanoids                      178 non-null float64
Nonflavanoid_phenols            178 non-null float64
Proanthocyanins                 178 non-null float64
Color_intensity                 178 non-null float64
Hue                             178 non-null float64
OD280-OD315_of_diluted_wines    178 non-null float64
Proline                         178 non-null int64
dtypes: float64(11), int64(3)
memory usage: 20.9 KB

In [73]:
target = ex1['Class']

In [74]:
target.value_counts()


Out[74]:
2    71
1    59
3    48
Name: Class, dtype: int64

In [75]:
features = ex1.ix[:, ex1.columns != 'Class']

In [76]:
features.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 178 entries, 0 to 177
Data columns (total 13 columns):
Alcohol                         178 non-null float64
Malic_acid                      178 non-null float64
Ash                             178 non-null float64
Alcalinity_of_ash               178 non-null float64
Magnesium                       178 non-null int64
Total_phenols                   178 non-null float64
Flavanoids                      178 non-null float64
Nonflavanoid_phenols            178 non-null float64
Proanthocyanins                 178 non-null float64
Color_intensity                 178 non-null float64
Hue                             178 non-null float64
OD280-OD315_of_diluted_wines    178 non-null float64
Proline                         178 non-null int64
dtypes: float64(11), int64(2)
memory usage: 19.5 KB

In [77]:
import seaborn as sns

In [78]:
#sns.pairplot(ex1, hue = 'Class')

In [79]:
from sklearn.preprocessing import MinMaxScaler

In [80]:
minmax = MinMaxScaler()

In [81]:
features = minmax.fit_transform(features)

In [82]:
pd.DataFrame(features, 
             columns = ['Alcohol','Malic_acid','Ash',
                        'Alcalinity_of_ash', 'Magnesium', 'Total_phenols',
                        'Flavanoids', 'Nonflavanoid_phenols', 'Proanthocyanins',
                        'Color_intensity','Hue','OD280-OD315_of_diluted_wines','Proline']).head()


Out[82]:
Alcohol Malic_acid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids Nonflavanoid_phenols Proanthocyanins Color_intensity Hue OD280-OD315_of_diluted_wines Proline
0 0.842105 0.191700 0.572193 0.257732 0.619565 0.627586 0.573840 0.283019 0.593060 0.372014 0.455285 0.970696 0.561341
1 0.571053 0.205534 0.417112 0.030928 0.326087 0.575862 0.510549 0.245283 0.274448 0.264505 0.463415 0.780220 0.550642
2 0.560526 0.320158 0.700535 0.412371 0.336957 0.627586 0.611814 0.320755 0.757098 0.375427 0.447154 0.695971 0.646933
3 0.878947 0.239130 0.609626 0.319588 0.467391 0.989655 0.664557 0.207547 0.558360 0.556314 0.308943 0.798535 0.857347
4 0.581579 0.365613 0.807487 0.536082 0.521739 0.627586 0.495781 0.490566 0.444795 0.259386 0.455285 0.608059 0.325963

In [83]:
from keras.utils import to_categorical

In [84]:
target = pd.get_dummies(ex1['Class']).values

In [85]:
target


Out[85]:
array([[ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.],
       [ 0.,  0.,  1.]])

In [86]:
ex1['Class'].value_counts()


Out[86]:
2    71
1    59
3    48
Name: Class, dtype: int64

In [87]:
pd.DataFrame(target).head()


Out[87]:
0 1 2
0 1 0 0
1 1 0 0
2 1 0 0
3 1 0 0
4 1 0 0

In [88]:
dflist = []

optimizers = ['SGD(lr = 0.01)',
              'SGD(lr = 0.01, momentum = 0.3)',
              'SGD(lr = 0.01, momentum = 0.3, nesterov = True)',  
              'Adam(lr = 0.01)',
              'Adam(lr = 0.005)',
              'Adagrad(lr = 0.01)',
              'RMSprop(lr = 0.01)']


from tqdm import tqdm
for opt_name in tqdm(optimizers):

    K.clear_session()
    
    model = Sequential()
    model.add(Dense(130, 
                    input_shape = (13,), 
                    activation = 'tanh'))
    model.add(Dense(3, 
                    activation = 'softmax'))
    
    model.compile(optimizer = eval(opt_name),
                  loss = 'categorical_crossentropy',
                  metrics = ['accuracy'])
    h = model.fit(features, target, 
                  batch_size = 16, 
                  epochs = 10, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, 
                               index = h.epoch))


100%|██████████| 7/7 [00:04<00:00,  1.54it/s]

In [89]:
historydf = pd.concat(dflist, 
                      axis = 1)
metrics_reported = dflist[0].columns
idx = pd.MultiIndex.from_product([optimizers, metrics_reported],
                                 names = ['optimizers', 'metric'])
historydf.columns = idx

fig, ax = plt.subplots(figsize = (15, 10))
ax = plt.subplot(211)
historydf.xs('loss', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1),
                                    ax = ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()


Adam optimizer with learning rate = 0.01 was chosen.


In [90]:
dflist = []

batch_sizes = [2, 4, 6, 8, 10, 12, 24]


from tqdm import tqdm
for batch_size in tqdm(batch_sizes):

    K.clear_session()
    
    model = Sequential()
    model.add(Dense(130, 
                    input_shape = (13,), 
                    activation = 'tanh'))
    model.add(Dense(3, 
                    activation = 'softmax'))
    
    model.compile(loss = 'categorical_crossentropy',
                  optimizer = Adam(lr = 0.01),
                  metrics = ['accuracy'])
    h = model.fit(features, target, 
                  batch_size = batch_size, 
                  epochs = 10, 
                  verbose = 0)
    
    dflist.append(pd.DataFrame(h.history, 
                               index = h.epoch))


100%|██████████| 7/7 [00:07<00:00,  1.06it/s]

In [91]:
historydf = pd.concat(dflist, 
                      axis = 1)

In [92]:
historydf.head()


Out[92]:
acc loss acc loss acc loss acc loss acc loss acc loss acc loss
0 0.803371 0.528475 0.797753 0.489380 0.764045 0.570996 0.797753 0.511780 0.780899 0.605954 0.752809 0.667297 0.679775 0.816689
1 0.960674 0.144766 0.960674 0.132916 0.971910 0.138444 0.949438 0.180804 0.938202 0.205330 0.949438 0.199800 0.938202 0.350859
2 0.938202 0.139032 0.960674 0.116774 0.971910 0.087018 0.938202 0.152513 0.960674 0.117344 0.966292 0.105297 0.949438 0.204282
3 0.971910 0.076639 0.960674 0.112455 0.955056 0.130058 0.949438 0.137861 0.949438 0.121696 0.988764 0.073977 0.977528 0.122986
4 0.943820 0.107712 0.955056 0.085477 0.977528 0.073341 0.977528 0.071082 0.977528 0.074324 0.977528 0.063888 0.988764 0.094505

In [93]:
metrics_reported = dflist[0].columns
metrics_reported


Out[93]:
Index(['acc', 'loss'], dtype='object')

In [94]:
idx = pd.MultiIndex.from_product([batch_sizes, metrics_reported],
                                 names = ['optimizers', 'metric'])
historydf.columns = idx

fig, ax = plt.subplots(figsize = (15, 10))
ax = plt.subplot(211)
historydf.xs('loss', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1),
                                    ax = ax)
plt.title("Loss")

ax = plt.subplot(212)
historydf.xs('acc', 
             axis = 1, 
             level = 'metric').plot(ylim = (0,1), 
                                    ax = ax)
plt.title("Accuracy")
plt.xlabel("Epochs")

plt.tight_layout()


Batch size of 6 was chosen!


In [95]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size = 0.2, random_state = 7)

In [96]:
X_train.shape


Out[96]:
(142, 13)

In [97]:
Y_train.shape


Out[97]:
(142, 3)

In [98]:
K.clear_session()

In [99]:
model = Sequential()
model.add(Dense(130, 
                input_shape = (13, ), 
                activation = 'tanh'))
model.add(Dense(3, 
                activation = 'softmax'))

model.compile(optimizer = Adam(lr = 0.01), 
              loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])

In [100]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 130)               1820      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 393       
=================================================================
Total params: 2,213
Trainable params: 2,213
Non-trainable params: 0
_________________________________________________________________

In [101]:
from keras.callbacks import History
history = History()

In [102]:
model.fit(X_train, 
          Y_train, 
          validation_split = 0.1, 
          epochs = 200, 
          verbose = 1, callbacks = [history])


Train on 127 samples, validate on 15 samples
Epoch 1/200
127/127 [==============================] - 0s - loss: 0.9770 - acc: 0.3701 - val_loss: 0.7682 - val_acc: 0.9333
Epoch 2/200
127/127 [==============================] - 0s - loss: 0.6486 - acc: 0.8504 - val_loss: 0.5317 - val_acc: 0.9333
Epoch 3/200
127/127 [==============================] - 0s - loss: 0.4315 - acc: 0.8583 - val_loss: 0.4505 - val_acc: 0.8667
Epoch 4/200
127/127 [==============================] - 0s - loss: 0.2787 - acc: 0.9370 - val_loss: 0.3306 - val_acc: 0.9333
Epoch 5/200
127/127 [==============================] - 0s - loss: 0.2176 - acc: 0.9606 - val_loss: 0.2852 - val_acc: 0.9333
Epoch 6/200
127/127 [==============================] - 0s - loss: 0.1521 - acc: 0.9685 - val_loss: 0.2382 - val_acc: 0.9333
Epoch 7/200
127/127 [==============================] - 0s - loss: 0.1242 - acc: 0.9685 - val_loss: 0.2561 - val_acc: 0.9333
Epoch 8/200
127/127 [==============================] - 0s - loss: 0.0976 - acc: 0.9764 - val_loss: 0.2619 - val_acc: 0.9333
Epoch 9/200
127/127 [==============================] - 0s - loss: 0.0804 - acc: 0.9921 - val_loss: 0.2396 - val_acc: 0.9333
Epoch 10/200
127/127 [==============================] - 0s - loss: 0.0682 - acc: 0.9921 - val_loss: 0.2303 - val_acc: 0.9333
Epoch 11/200
127/127 [==============================] - 0s - loss: 0.0612 - acc: 0.9921 - val_loss: 0.2167 - val_acc: 0.9333
Epoch 12/200
127/127 [==============================] - 0s - loss: 0.0499 - acc: 0.9921 - val_loss: 0.2592 - val_acc: 0.9333
Epoch 13/200
127/127 [==============================] - 0s - loss: 0.0469 - acc: 0.9921 - val_loss: 0.2296 - val_acc: 0.9333
Epoch 14/200
127/127 [==============================] - 0s - loss: 0.0414 - acc: 0.9921 - val_loss: 0.2017 - val_acc: 0.9333
Epoch 15/200
127/127 [==============================] - 0s - loss: 0.0365 - acc: 1.0000 - val_loss: 0.2303 - val_acc: 0.9333
Epoch 16/200
127/127 [==============================] - 0s - loss: 0.0327 - acc: 0.9921 - val_loss: 0.2180 - val_acc: 0.9333
Epoch 17/200
127/127 [==============================] - 0s - loss: 0.0298 - acc: 1.0000 - val_loss: 0.1860 - val_acc: 0.9333
Epoch 18/200
127/127 [==============================] - 0s - loss: 0.0275 - acc: 1.0000 - val_loss: 0.1995 - val_acc: 0.9333
Epoch 19/200
127/127 [==============================] - 0s - loss: 0.0240 - acc: 1.0000 - val_loss: 0.1796 - val_acc: 0.9333
Epoch 20/200
127/127 [==============================] - 0s - loss: 0.0231 - acc: 1.0000 - val_loss: 0.1852 - val_acc: 0.9333
Epoch 21/200
127/127 [==============================] - 0s - loss: 0.0201 - acc: 1.0000 - val_loss: 0.2173 - val_acc: 0.9333
Epoch 22/200
127/127 [==============================] - 0s - loss: 0.0203 - acc: 1.0000 - val_loss: 0.1877 - val_acc: 0.9333
Epoch 23/200
127/127 [==============================] - 0s - loss: 0.0187 - acc: 1.0000 - val_loss: 0.1543 - val_acc: 0.9333
Epoch 24/200
127/127 [==============================] - 0s - loss: 0.0170 - acc: 1.0000 - val_loss: 0.1818 - val_acc: 0.9333
Epoch 25/200
127/127 [==============================] - 0s - loss: 0.0157 - acc: 1.0000 - val_loss: 0.1855 - val_acc: 0.9333
Epoch 26/200
127/127 [==============================] - 0s - loss: 0.0146 - acc: 1.0000 - val_loss: 0.1484 - val_acc: 0.9333
Epoch 27/200
127/127 [==============================] - 0s - loss: 0.0140 - acc: 1.0000 - val_loss: 0.1574 - val_acc: 0.9333
Epoch 28/200
127/127 [==============================] - 0s - loss: 0.0118 - acc: 1.0000 - val_loss: 0.1886 - val_acc: 0.9333
Epoch 29/200
127/127 [==============================] - 0s - loss: 0.0127 - acc: 1.0000 - val_loss: 0.1649 - val_acc: 0.9333
Epoch 30/200
127/127 [==============================] - 0s - loss: 0.0107 - acc: 1.0000 - val_loss: 0.1540 - val_acc: 0.9333
Epoch 31/200
127/127 [==============================] - 0s - loss: 0.0103 - acc: 1.0000 - val_loss: 0.1425 - val_acc: 0.9333
Epoch 32/200
127/127 [==============================] - 0s - loss: 0.0106 - acc: 1.0000 - val_loss: 0.1546 - val_acc: 0.9333
Epoch 33/200
127/127 [==============================] - 0s - loss: 0.0094 - acc: 1.0000 - val_loss: 0.1855 - val_acc: 0.9333
Epoch 34/200
127/127 [==============================] - 0s - loss: 0.0098 - acc: 1.0000 - val_loss: 0.1547 - val_acc: 0.9333
Epoch 35/200
127/127 [==============================] - 0s - loss: 0.0089 - acc: 1.0000 - val_loss: 0.1298 - val_acc: 0.9333
Epoch 36/200
127/127 [==============================] - 0s - loss: 0.0087 - acc: 1.0000 - val_loss: 0.1400 - val_acc: 0.9333
Epoch 37/200
127/127 [==============================] - 0s - loss: 0.0071 - acc: 1.0000 - val_loss: 0.1759 - val_acc: 0.9333
Epoch 38/200
127/127 [==============================] - 0s - loss: 0.0080 - acc: 1.0000 - val_loss: 0.1730 - val_acc: 0.9333
Epoch 39/200
127/127 [==============================] - 0s - loss: 0.0066 - acc: 1.0000 - val_loss: 0.1433 - val_acc: 0.9333
Epoch 40/200
127/127 [==============================] - 0s - loss: 0.0076 - acc: 1.0000 - val_loss: 0.1349 - val_acc: 0.8667
Epoch 41/200
127/127 [==============================] - 0s - loss: 0.0065 - acc: 1.0000 - val_loss: 0.1597 - val_acc: 0.9333
Epoch 42/200
127/127 [==============================] - 0s - loss: 0.0061 - acc: 1.0000 - val_loss: 0.1583 - val_acc: 0.9333
Epoch 43/200
127/127 [==============================] - 0s - loss: 0.0061 - acc: 1.0000 - val_loss: 0.1387 - val_acc: 0.9333
Epoch 44/200
127/127 [==============================] - 0s - loss: 0.0054 - acc: 1.0000 - val_loss: 0.1426 - val_acc: 0.9333
Epoch 45/200
127/127 [==============================] - 0s - loss: 0.0051 - acc: 1.0000 - val_loss: 0.1511 - val_acc: 0.9333
Epoch 46/200
127/127 [==============================] - 0s - loss: 0.0049 - acc: 1.0000 - val_loss: 0.1485 - val_acc: 0.9333
Epoch 47/200
127/127 [==============================] - 0s - loss: 0.0047 - acc: 1.0000 - val_loss: 0.1420 - val_acc: 0.9333
Epoch 48/200
127/127 [==============================] - 0s - loss: 0.0046 - acc: 1.0000 - val_loss: 0.1472 - val_acc: 0.9333
Epoch 49/200
127/127 [==============================] - 0s - loss: 0.0045 - acc: 1.0000 - val_loss: 0.1409 - val_acc: 0.9333
Epoch 50/200
127/127 [==============================] - ETA: 0s - loss: 0.0072 - acc: 1.0000 - 0s - loss: 0.0042 - acc: 1.0000 - val_loss: 0.1429 - val_acc: 0.9333
Epoch 51/200
127/127 [==============================] - 0s - loss: 0.0040 - acc: 1.0000 - val_loss: 0.1466 - val_acc: 0.9333
Epoch 52/200
127/127 [==============================] - 0s - loss: 0.0040 - acc: 1.0000 - val_loss: 0.1463 - val_acc: 0.9333
Epoch 53/200
127/127 [==============================] - 0s - loss: 0.0038 - acc: 1.0000 - val_loss: 0.1475 - val_acc: 0.9333
Epoch 54/200
127/127 [==============================] - 0s - loss: 0.0036 - acc: 1.0000 - val_loss: 0.1488 - val_acc: 0.9333
Epoch 55/200
127/127 [==============================] - 0s - loss: 0.0039 - acc: 1.0000 - val_loss: 0.1410 - val_acc: 0.8667
Epoch 56/200
127/127 [==============================] - 0s - loss: 0.0035 - acc: 1.0000 - val_loss: 0.1447 - val_acc: 0.9333
Epoch 57/200
127/127 [==============================] - 0s - loss: 0.0034 - acc: 1.0000 - val_loss: 0.1467 - val_acc: 0.9333
Epoch 58/200
127/127 [==============================] - 0s - loss: 0.0032 - acc: 1.0000 - val_loss: 0.1430 - val_acc: 0.8667
Epoch 59/200
127/127 [==============================] - 0s - loss: 0.0031 - acc: 1.0000 - val_loss: 0.1395 - val_acc: 0.8667
Epoch 60/200
127/127 [==============================] - 0s - loss: 0.0031 - acc: 1.0000 - val_loss: 0.1395 - val_acc: 0.8667
Epoch 61/200
127/127 [==============================] - 0s - loss: 0.0029 - acc: 1.0000 - val_loss: 0.1439 - val_acc: 0.8667
Epoch 62/200
127/127 [==============================] - 0s - loss: 0.0029 - acc: 1.0000 - val_loss: 0.1471 - val_acc: 0.9333
Epoch 63/200
127/127 [==============================] - 0s - loss: 0.0028 - acc: 1.0000 - val_loss: 0.1430 - val_acc: 0.8667
Epoch 64/200
127/127 [==============================] - 0s - loss: 0.0027 - acc: 1.0000 - val_loss: 0.1409 - val_acc: 0.8667
Epoch 65/200
127/127 [==============================] - 0s - loss: 0.0027 - acc: 1.0000 - val_loss: 0.1409 - val_acc: 0.8667
Epoch 66/200
127/127 [==============================] - 0s - loss: 0.0025 - acc: 1.0000 - val_loss: 0.1488 - val_acc: 0.8667
Epoch 67/200
127/127 [==============================] - 0s - loss: 0.0026 - acc: 1.0000 - val_loss: 0.1474 - val_acc: 0.8667
Epoch 68/200
127/127 [==============================] - 0s - loss: 0.0024 - acc: 1.0000 - val_loss: 0.1445 - val_acc: 0.8667
Epoch 69/200
127/127 [==============================] - ETA: 0s - loss: 0.0010 - acc: 1.0000 - 0s - loss: 0.0023 - acc: 1.0000 - val_loss: 0.1419 - val_acc: 0.8667
Epoch 70/200
127/127 [==============================] - 0s - loss: 0.0023 - acc: 1.0000 - val_loss: 0.1418 - val_acc: 0.8667
Epoch 71/200
127/127 [==============================] - 0s - loss: 0.0022 - acc: 1.0000 - val_loss: 0.1426 - val_acc: 0.8667
Epoch 72/200
127/127 [==============================] - 0s - loss: 0.0022 - acc: 1.0000 - val_loss: 0.1449 - val_acc: 0.8667
Epoch 73/200
127/127 [==============================] - 0s - loss: 0.0021 - acc: 1.0000 - val_loss: 0.1460 - val_acc: 0.8667
Epoch 74/200
127/127 [==============================] - 0s - loss: 0.0020 - acc: 1.0000 - val_loss: 0.1444 - val_acc: 0.8667
Epoch 75/200
127/127 [==============================] - 0s - loss: 0.0021 - acc: 1.0000 - val_loss: 0.1462 - val_acc: 0.8667
Epoch 76/200
127/127 [==============================] - 0s - loss: 0.0020 - acc: 1.0000 - val_loss: 0.1441 - val_acc: 0.8667
Epoch 77/200
127/127 [==============================] - 0s - loss: 0.0019 - acc: 1.0000 - val_loss: 0.1430 - val_acc: 0.8667
Epoch 78/200
127/127 [==============================] - 0s - loss: 0.0019 - acc: 1.0000 - val_loss: 0.1447 - val_acc: 0.8667
Epoch 79/200
127/127 [==============================] - 0s - loss: 0.0018 - acc: 1.0000 - val_loss: 0.1447 - val_acc: 0.8667
Epoch 80/200
127/127 [==============================] - 0s - loss: 0.0018 - acc: 1.0000 - val_loss: 0.1447 - val_acc: 0.8667
Epoch 81/200
127/127 [==============================] - 0s - loss: 0.0017 - acc: 1.0000 - val_loss: 0.1444 - val_acc: 0.8667
Epoch 82/200
127/127 [==============================] - 0s - loss: 0.0017 - acc: 1.0000 - val_loss: 0.1449 - val_acc: 0.8667
Epoch 83/200
127/127 [==============================] - 0s - loss: 0.0017 - acc: 1.0000 - val_loss: 0.1456 - val_acc: 0.8667
Epoch 84/200
127/127 [==============================] - ETA: 0s - loss: 0.0011 - acc: 1.0000 - 0s - loss: 0.0016 - acc: 1.0000 - val_loss: 0.1459 - val_acc: 0.8667
Epoch 85/200
127/127 [==============================] - 0s - loss: 0.0016 - acc: 1.0000 - val_loss: 0.1462 - val_acc: 0.8667
Epoch 86/200
127/127 [==============================] - 0s - loss: 0.0016 - acc: 1.0000 - val_loss: 0.1447 - val_acc: 0.8667
Epoch 87/200
127/127 [==============================] - 0s - loss: 0.0015 - acc: 1.0000 - val_loss: 0.1457 - val_acc: 0.8667
Epoch 88/200
127/127 [==============================] - 0s - loss: 0.0015 - acc: 1.0000 - val_loss: 0.1456 - val_acc: 0.8667
Epoch 89/200
127/127 [==============================] - 0s - loss: 0.0015 - acc: 1.0000 - val_loss: 0.1471 - val_acc: 0.8667
Epoch 90/200
127/127 [==============================] - 0s - loss: 0.0014 - acc: 1.0000 - val_loss: 0.1466 - val_acc: 0.8667
Epoch 91/200
127/127 [==============================] - 0s - loss: 0.0014 - acc: 1.0000 - val_loss: 0.1473 - val_acc: 0.8667
Epoch 92/200
127/127 [==============================] - 0s - loss: 0.0014 - acc: 1.0000 - val_loss: 0.1475 - val_acc: 0.8667
Epoch 93/200
127/127 [==============================] - 0s - loss: 0.0013 - acc: 1.0000 - val_loss: 0.1468 - val_acc: 0.8667
Epoch 94/200
127/127 [==============================] - 0s - loss: 0.0013 - acc: 1.0000 - val_loss: 0.1477 - val_acc: 0.9333
Epoch 95/200
127/127 [==============================] - 0s - loss: 0.0013 - acc: 1.0000 - val_loss: 0.1477 - val_acc: 0.9333
Epoch 96/200
127/127 [==============================] - 0s - loss: 0.0013 - acc: 1.0000 - val_loss: 0.1481 - val_acc: 0.8667
Epoch 97/200
127/127 [==============================] - 0s - loss: 0.0012 - acc: 1.0000 - val_loss: 0.1479 - val_acc: 0.8667
Epoch 98/200
127/127 [==============================] - 0s - loss: 0.0012 - acc: 1.0000 - val_loss: 0.1480 - val_acc: 0.8667
Epoch 99/200
127/127 [==============================] - 0s - loss: 0.0012 - acc: 1.0000 - val_loss: 0.1474 - val_acc: 0.8667
Epoch 100/200
127/127 [==============================] - 0s - loss: 0.0012 - acc: 1.0000 - val_loss: 0.1480 - val_acc: 0.8667
Epoch 101/200
127/127 [==============================] - 0s - loss: 0.0012 - acc: 1.0000 - val_loss: 0.1493 - val_acc: 0.8667
Epoch 102/200
127/127 [==============================] - ETA: 0s - loss: 6.0043e-04 - acc: 1.0000 - 0s - loss: 0.0011 - acc: 1.0000 - val_loss: 0.1496 - val_acc: 0.8667
Epoch 103/200
127/127 [==============================] - 0s - loss: 0.0011 - acc: 1.0000 - val_loss: 0.1488 - val_acc: 0.8667
Epoch 104/200
127/127 [==============================] - 0s - loss: 0.0011 - acc: 1.0000 - val_loss: 0.1490 - val_acc: 0.9333
Epoch 105/200
127/127 [==============================] - 0s - loss: 0.0011 - acc: 1.0000 - val_loss: 0.1478 - val_acc: 0.9333
Epoch 106/200
127/127 [==============================] - 0s - loss: 0.0010 - acc: 1.0000 - val_loss: 0.1487 - val_acc: 0.8667
Epoch 107/200
127/127 [==============================] - 0s - loss: 0.0010 - acc: 1.0000 - val_loss: 0.1492 - val_acc: 0.8667
Epoch 108/200
127/127 [==============================] - 0s - loss: 0.0010 - acc: 1.0000 - val_loss: 0.1501 - val_acc: 0.9333
Epoch 109/200
127/127 [==============================] - 0s - loss: 0.0010 - acc: 1.0000 - val_loss: 0.1517 - val_acc: 0.8667
Epoch 110/200
127/127 [==============================] - 0s - loss: 9.9258e-04 - acc: 1.0000 - val_loss: 0.1516 - val_acc: 0.8667
Epoch 111/200
127/127 [==============================] - 0s - loss: 9.8053e-04 - acc: 1.0000 - val_loss: 0.1520 - val_acc: 0.9333
Epoch 112/200
127/127 [==============================] - 0s - loss: 9.5091e-04 - acc: 1.0000 - val_loss: 0.1519 - val_acc: 0.9333
Epoch 113/200
127/127 [==============================] - 0s - loss: 9.4561e-04 - acc: 1.0000 - val_loss: 0.1520 - val_acc: 0.9333
Epoch 114/200
127/127 [==============================] - 0s - loss: 9.1983e-04 - acc: 1.0000 - val_loss: 0.1517 - val_acc: 0.8667
Epoch 115/200
127/127 [==============================] - 0s - loss: 9.2776e-04 - acc: 1.0000 - val_loss: 0.1518 - val_acc: 0.8667
Epoch 116/200
127/127 [==============================] - 0s - loss: 8.9215e-04 - acc: 1.0000 - val_loss: 0.1522 - val_acc: 0.9333
Epoch 117/200
127/127 [==============================] - 0s - loss: 8.6928e-04 - acc: 1.0000 - val_loss: 0.1527 - val_acc: 0.9333
Epoch 118/200
127/127 [==============================] - 0s - loss: 8.6761e-04 - acc: 1.0000 - val_loss: 0.1537 - val_acc: 0.9333
Epoch 119/200
127/127 [==============================] - 0s - loss: 8.5902e-04 - acc: 1.0000 - val_loss: 0.1532 - val_acc: 0.9333
Epoch 120/200
127/127 [==============================] - 0s - loss: 8.3638e-04 - acc: 1.0000 - val_loss: 0.1539 - val_acc: 0.9333
Epoch 121/200
127/127 [==============================] - 0s - loss: 8.1894e-04 - acc: 1.0000 - val_loss: 0.1538 - val_acc: 0.9333
Epoch 122/200
127/127 [==============================] - 0s - loss: 8.0851e-04 - acc: 1.0000 - val_loss: 0.1540 - val_acc: 0.8667
Epoch 123/200
127/127 [==============================] - 0s - loss: 7.9325e-04 - acc: 1.0000 - val_loss: 0.1546 - val_acc: 0.9333
Epoch 124/200
127/127 [==============================] - 0s - loss: 7.8553e-04 - acc: 1.0000 - val_loss: 0.1553 - val_acc: 0.9333
Epoch 125/200
127/127 [==============================] - 0s - loss: 7.9132e-04 - acc: 1.0000 - val_loss: 0.1560 - val_acc: 0.9333
Epoch 126/200
127/127 [==============================] - 0s - loss: 7.8031e-04 - acc: 1.0000 - val_loss: 0.1544 - val_acc: 0.9333
Epoch 127/200
127/127 [==============================] - 0s - loss: 7.4439e-04 - acc: 1.0000 - val_loss: 0.1544 - val_acc: 0.9333
Epoch 128/200
127/127 [==============================] - 0s - loss: 7.3899e-04 - acc: 1.0000 - val_loss: 0.1543 - val_acc: 0.9333
Epoch 129/200
127/127 [==============================] - 0s - loss: 7.3668e-04 - acc: 1.0000 - val_loss: 0.1555 - val_acc: 0.9333
Epoch 130/200
127/127 [==============================] - 0s - loss: 7.1648e-04 - acc: 1.0000 - val_loss: 0.1557 - val_acc: 0.9333
Epoch 131/200
127/127 [==============================] - 0s - loss: 7.0518e-04 - acc: 1.0000 - val_loss: 0.1557 - val_acc: 0.9333
Epoch 132/200
127/127 [==============================] - 0s - loss: 6.9502e-04 - acc: 1.0000 - val_loss: 0.1564 - val_acc: 0.9333
Epoch 133/200
127/127 [==============================] - 0s - loss: 6.8423e-04 - acc: 1.0000 - val_loss: 0.1575 - val_acc: 0.9333
Epoch 134/200
127/127 [==============================] - 0s - loss: 6.7956e-04 - acc: 1.0000 - val_loss: 0.1588 - val_acc: 0.9333
Epoch 135/200
127/127 [==============================] - 0s - loss: 6.7498e-04 - acc: 1.0000 - val_loss: 0.1594 - val_acc: 0.9333
Epoch 136/200
127/127 [==============================] - 0s - loss: 6.5731e-04 - acc: 1.0000 - val_loss: 0.1578 - val_acc: 0.9333
Epoch 137/200
127/127 [==============================] - 0s - loss: 6.6113e-04 - acc: 1.0000 - val_loss: 0.1570 - val_acc: 0.9333
Epoch 138/200
127/127 [==============================] - 0s - loss: 6.5028e-04 - acc: 1.0000 - val_loss: 0.1584 - val_acc: 0.9333
Epoch 139/200
127/127 [==============================] - 0s - loss: 6.3452e-04 - acc: 1.0000 - val_loss: 0.1582 - val_acc: 0.9333
Epoch 140/200
127/127 [==============================] - 0s - loss: 6.2586e-04 - acc: 1.0000 - val_loss: 0.1588 - val_acc: 0.9333
Epoch 141/200
127/127 [==============================] - 0s - loss: 6.1524e-04 - acc: 1.0000 - val_loss: 0.1588 - val_acc: 0.9333
Epoch 142/200
127/127 [==============================] - 0s - loss: 6.0585e-04 - acc: 1.0000 - val_loss: 0.1593 - val_acc: 0.9333
Epoch 143/200
127/127 [==============================] - 0s - loss: 6.0122e-04 - acc: 1.0000 - val_loss: 0.1604 - val_acc: 0.9333
Epoch 144/200
127/127 [==============================] - 0s - loss: 5.9080e-04 - acc: 1.0000 - val_loss: 0.1601 - val_acc: 0.9333
Epoch 145/200
127/127 [==============================] - 0s - loss: 5.7986e-04 - acc: 1.0000 - val_loss: 0.1604 - val_acc: 0.9333
Epoch 146/200
127/127 [==============================] - 0s - loss: 5.7719e-04 - acc: 1.0000 - val_loss: 0.1595 - val_acc: 0.9333
Epoch 147/200
127/127 [==============================] - ETA: 0s - loss: 4.1802e-04 - acc: 1.0000 - 0s - loss: 5.7126e-04 - acc: 1.0000 - val_loss: 0.1610 - val_acc: 0.9333
Epoch 148/200
127/127 [==============================] - 0s - loss: 5.6008e-04 - acc: 1.0000 - val_loss: 0.1608 - val_acc: 0.9333
Epoch 149/200
127/127 [==============================] - 0s - loss: 5.5649e-04 - acc: 1.0000 - val_loss: 0.1620 - val_acc: 0.9333
Epoch 150/200
127/127 [==============================] - 0s - loss: 5.4667e-04 - acc: 1.0000 - val_loss: 0.1624 - val_acc: 0.9333
Epoch 151/200
127/127 [==============================] - 0s - loss: 5.4103e-04 - acc: 1.0000 - val_loss: 0.1623 - val_acc: 0.9333
Epoch 152/200
127/127 [==============================] - 0s - loss: 5.3711e-04 - acc: 1.0000 - val_loss: 0.1607 - val_acc: 0.9333
Epoch 153/200
127/127 [==============================] - 0s - loss: 5.3092e-04 - acc: 1.0000 - val_loss: 0.1607 - val_acc: 0.9333
Epoch 154/200
127/127 [==============================] - 0s - loss: 5.1959e-04 - acc: 1.0000 - val_loss: 0.1617 - val_acc: 0.9333
Epoch 155/200
127/127 [==============================] - 0s - loss: 5.1651e-04 - acc: 1.0000 - val_loss: 0.1638 - val_acc: 0.9333
Epoch 156/200
127/127 [==============================] - 0s - loss: 5.1237e-04 - acc: 1.0000 - val_loss: 0.1643 - val_acc: 0.9333
Epoch 157/200
127/127 [==============================] - 0s - loss: 4.9958e-04 - acc: 1.0000 - val_loss: 0.1640 - val_acc: 0.9333
Epoch 158/200
127/127 [==============================] - 0s - loss: 4.9831e-04 - acc: 1.0000 - val_loss: 0.1631 - val_acc: 0.9333
Epoch 159/200
127/127 [==============================] - 0s - loss: 4.9488e-04 - acc: 1.0000 - val_loss: 0.1631 - val_acc: 0.9333
Epoch 160/200
127/127 [==============================] - 0s - loss: 4.8536e-04 - acc: 1.0000 - val_loss: 0.1638 - val_acc: 0.9333
Epoch 161/200
127/127 [==============================] - 0s - loss: 4.8691e-04 - acc: 1.0000 - val_loss: 0.1634 - val_acc: 0.9333
Epoch 162/200
127/127 [==============================] - 0s - loss: 4.7134e-04 - acc: 1.0000 - val_loss: 0.1651 - val_acc: 0.9333
Epoch 163/200
127/127 [==============================] - 0s - loss: 4.7811e-04 - acc: 1.0000 - val_loss: 0.1681 - val_acc: 0.9333
Epoch 164/200
127/127 [==============================] - 0s - loss: 4.8277e-04 - acc: 1.0000 - val_loss: 0.1651 - val_acc: 0.9333
Epoch 165/200
127/127 [==============================] - 0s - loss: 4.5952e-04 - acc: 1.0000 - val_loss: 0.1646 - val_acc: 0.9333
Epoch 166/200
127/127 [==============================] - 0s - loss: 4.5102e-04 - acc: 1.0000 - val_loss: 0.1654 - val_acc: 0.9333
Epoch 167/200
127/127 [==============================] - 0s - loss: 4.4762e-04 - acc: 1.0000 - val_loss: 0.1672 - val_acc: 0.9333
Epoch 168/200
127/127 [==============================] - 0s - loss: 4.4442e-04 - acc: 1.0000 - val_loss: 0.1667 - val_acc: 0.9333
Epoch 169/200
127/127 [==============================] - 0s - loss: 4.3878e-04 - acc: 1.0000 - val_loss: 0.1676 - val_acc: 0.9333
Epoch 170/200
127/127 [==============================] - 0s - loss: 4.3383e-04 - acc: 1.0000 - val_loss: 0.1669 - val_acc: 0.9333
Epoch 171/200
127/127 [==============================] - 0s - loss: 4.2543e-04 - acc: 1.0000 - val_loss: 0.1667 - val_acc: 0.9333
Epoch 172/200
127/127 [==============================] - 0s - loss: 4.2349e-04 - acc: 1.0000 - val_loss: 0.1670 - val_acc: 0.9333
Epoch 173/200
127/127 [==============================] - 0s - loss: 4.1821e-04 - acc: 1.0000 - val_loss: 0.1672 - val_acc: 0.9333
Epoch 174/200
127/127 [==============================] - 0s - loss: 4.1294e-04 - acc: 1.0000 - val_loss: 0.1675 - val_acc: 0.9333
Epoch 175/200
127/127 [==============================] - 0s - loss: 4.0850e-04 - acc: 1.0000 - val_loss: 0.1700 - val_acc: 0.9333
Epoch 176/200
127/127 [==============================] - 0s - loss: 4.0524e-04 - acc: 1.0000 - val_loss: 0.1705 - val_acc: 0.9333
Epoch 177/200
127/127 [==============================] - 0s - loss: 4.0005e-04 - acc: 1.0000 - val_loss: 0.1705 - val_acc: 0.9333
Epoch 178/200
127/127 [==============================] - 0s - loss: 3.9459e-04 - acc: 1.0000 - val_loss: 0.1695 - val_acc: 0.9333
Epoch 179/200
127/127 [==============================] - 0s - loss: 3.9324e-04 - acc: 1.0000 - val_loss: 0.1680 - val_acc: 0.9333
Epoch 180/200
127/127 [==============================] - ETA: 0s - loss: 6.2949e-04 - acc: 1.0000 - 0s - loss: 3.8912e-04 - acc: 1.0000 - val_loss: 0.1688 - val_acc: 0.9333
Epoch 181/200
127/127 [==============================] - 0s - loss: 3.8308e-04 - acc: 1.0000 - val_loss: 0.1685 - val_acc: 0.9333
Epoch 182/200
127/127 [==============================] - 0s - loss: 3.7827e-04 - acc: 1.0000 - val_loss: 0.1695 - val_acc: 0.9333
Epoch 183/200
127/127 [==============================] - 0s - loss: 3.7585e-04 - acc: 1.0000 - val_loss: 0.1709 - val_acc: 0.9333
Epoch 184/200
127/127 [==============================] - 0s - loss: 3.7026e-04 - acc: 1.0000 - val_loss: 0.1708 - val_acc: 0.9333
Epoch 185/200
127/127 [==============================] - 0s - loss: 3.6701e-04 - acc: 1.0000 - val_loss: 0.1707 - val_acc: 0.9333
Epoch 186/200
127/127 [==============================] - 0s - loss: 3.6377e-04 - acc: 1.0000 - val_loss: 0.1705 - val_acc: 0.9333
Epoch 187/200
127/127 [==============================] - 0s - loss: 3.6041e-04 - acc: 1.0000 - val_loss: 0.1707 - val_acc: 0.9333
Epoch 188/200
127/127 [==============================] - 0s - loss: 3.6694e-04 - acc: 1.0000 - val_loss: 0.1739 - val_acc: 0.9333
Epoch 189/200
127/127 [==============================] - 0s - loss: 3.5583e-04 - acc: 1.0000 - val_loss: 0.1724 - val_acc: 0.9333
Epoch 190/200
127/127 [==============================] - 0s - loss: 3.4899e-04 - acc: 1.0000 - val_loss: 0.1723 - val_acc: 0.9333
Epoch 191/200
127/127 [==============================] - 0s - loss: 3.4658e-04 - acc: 1.0000 - val_loss: 0.1720 - val_acc: 0.9333
Epoch 192/200
127/127 [==============================] - 0s - loss: 3.4153e-04 - acc: 1.0000 - val_loss: 0.1720 - val_acc: 0.9333
Epoch 193/200
127/127 [==============================] - 0s - loss: 3.3765e-04 - acc: 1.0000 - val_loss: 0.1723 - val_acc: 0.9333
Epoch 194/200
127/127 [==============================] - 0s - loss: 3.3587e-04 - acc: 1.0000 - val_loss: 0.1742 - val_acc: 0.9333
Epoch 195/200
127/127 [==============================] - 0s - loss: 3.3201e-04 - acc: 1.0000 - val_loss: 0.1750 - val_acc: 0.9333
Epoch 196/200
127/127 [==============================] - 0s - loss: 3.2963e-04 - acc: 1.0000 - val_loss: 0.1741 - val_acc: 0.9333
Epoch 197/200
127/127 [==============================] - 0s - loss: 3.2492e-04 - acc: 1.0000 - val_loss: 0.1744 - val_acc: 0.9333
Epoch 198/200
127/127 [==============================] - 0s - loss: 3.2730e-04 - acc: 1.0000 - val_loss: 0.1729 - val_acc: 0.9333
Epoch 199/200
127/127 [==============================] - 0s - loss: 3.2026e-04 - acc: 1.0000 - val_loss: 0.1741 - val_acc: 0.9333
Epoch 200/200
127/127 [==============================] - 0s - loss: 3.1762e-04 - acc: 1.0000 - val_loss: 0.1743 - val_acc: 0.9333
Out[102]:
<keras.callbacks.History at 0x7f81d356d390>

In [103]:
history.history.keys()


Out[103]:
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

In [104]:
# Last few values from the history
history.history['val_acc'][-5:]


Out[104]:
[0.93333333730697632,
 0.93333333730697632,
 0.93333333730697632,
 0.93333333730697632,
 0.93333333730697632]

In [105]:
model.evaluate(X_test, Y_test)


32/36 [=========================>....] - ETA: 0s
Out[105]:
[0.042196211793149509, 0.97222222222222221]

The model obtains 97.22 % accuracy!

Exercise 2

Since this dataset has 13 features we can only visualize pairs of features like we did in the Paired plot. We could however exploit the fact that a neural network is a function to extract 2 high level features to represent our data.

  • Build a deep fully connected network with the following structure:
    • Layer 1: 8 nodes
    • Layer 2: 5 nodes
    • Layer 3: 2 nodes
    • Output : 3 nodes
  • Choose activation functions, inizializations, optimizer and learning rate so that it converges to 100% accuracy within 20 epochs (not easy)
  • Remember to train the model on the scaled data
  • Define a Feature Function like we did above between the input of the 1st layer and the output of the 3rd layer
  • Calculate the features and plot them on a 2-dimensional scatter plot
  • Can we distinguish the 3 classes well?

In [106]:
K.clear_session()
model = Sequential()
model.add(Dense(units = 8, 
                input_shape = (13, ),
                activation = 'tanh', 
                name = 'Layer_1'))
model.add(Dense(units = 5, 
                activation = 'tanh', 
                name = 'Layer_2'))
model.add(Dense(units = 2, 
                activation = 'relu', 
                name = 'Layer_3'))
model.add(Dense(units = 3, 
                activation = 'softmax', 
                name = 'Output'))

model.compile(optimizer = Adam(lr = 0.01), 
              loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])

In [107]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
Layer_1 (Dense)              (None, 8)                 112       
_________________________________________________________________
Layer_2 (Dense)              (None, 5)                 45        
_________________________________________________________________
Layer_3 (Dense)              (None, 2)                 12        
_________________________________________________________________
Output (Dense)               (None, 3)                 9         
=================================================================
Total params: 178
Trainable params: 178
Non-trainable params: 0
_________________________________________________________________

In [108]:
model.fit(X_train, 
          Y_train, 
          validation_split = 0.1, 
          epochs = 20, 
          verbose = 2)


Train on 127 samples, validate on 15 samples
Epoch 1/20
0s - loss: 1.1104 - acc: 0.3150 - val_loss: 1.1024 - val_acc: 0.2000
Epoch 2/20
0s - loss: 1.0929 - acc: 0.3858 - val_loss: 1.0989 - val_acc: 0.2000
Epoch 3/20
0s - loss: 1.0899 - acc: 0.3858 - val_loss: 1.0969 - val_acc: 0.2000
Epoch 4/20
0s - loss: 1.0880 - acc: 0.3858 - val_loss: 1.0961 - val_acc: 0.2000
Epoch 5/20
0s - loss: 1.0871 - acc: 0.3858 - val_loss: 1.0955 - val_acc: 0.2000
Epoch 6/20
0s - loss: 1.0862 - acc: 0.3858 - val_loss: 1.0963 - val_acc: 0.2000
Epoch 7/20
0s - loss: 1.0850 - acc: 0.3858 - val_loss: 1.0960 - val_acc: 0.2000
Epoch 8/20
0s - loss: 1.0842 - acc: 0.3858 - val_loss: 1.0956 - val_acc: 0.2000
Epoch 9/20
0s - loss: 1.0833 - acc: 0.3858 - val_loss: 1.0957 - val_acc: 0.2000
Epoch 10/20
0s - loss: 1.0832 - acc: 0.3858 - val_loss: 1.0961 - val_acc: 0.2000
Epoch 11/20
0s - loss: 1.0833 - acc: 0.3858 - val_loss: 1.0962 - val_acc: 0.2000
Epoch 12/20
0s - loss: 1.0829 - acc: 0.3858 - val_loss: 1.0969 - val_acc: 0.2000
Epoch 13/20
0s - loss: 1.0829 - acc: 0.3858 - val_loss: 1.0970 - val_acc: 0.2000
Epoch 14/20
0s - loss: 1.0828 - acc: 0.3858 - val_loss: 1.0981 - val_acc: 0.2000
Epoch 15/20
0s - loss: 1.0828 - acc: 0.3858 - val_loss: 1.0985 - val_acc: 0.2000
Epoch 16/20
0s - loss: 1.0828 - acc: 0.3858 - val_loss: 1.0996 - val_acc: 0.2000
Epoch 17/20
0s - loss: 1.0830 - acc: 0.3858 - val_loss: 1.0978 - val_acc: 0.2000
Epoch 18/20
0s - loss: 1.0828 - acc: 0.3858 - val_loss: 1.0987 - val_acc: 0.2000
Epoch 19/20
0s - loss: 1.0827 - acc: 0.3858 - val_loss: 1.0986 - val_acc: 0.2000
Epoch 20/20
0s - loss: 1.0830 - acc: 0.3858 - val_loss: 1.1001 - val_acc: 0.2000
Out[108]:
<keras.callbacks.History at 0x7f81d8053390>

In [109]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model).create(prog='dot', format='svg'))


Out[109]:
G 140195653052344 Layer_1_input: InputLayer 140195653052008 Layer_1: Dense 140195653052344->140195653052008 140195653053016 Layer_2: Dense 140195653052008->140195653053016 140195652597072 Layer_3: Dense 140195653053016->140195652597072 140195652597352 Output: Dense 140195652597072->140195652597352

In [110]:
from keras.utils import plot_model
plot_model(model, to_file='5_model_1.png', show_shapes = True)

In [111]:
from IPython.display import Image
Image('5_model_1.png')


Out[111]:

In [112]:
# Intput of the 1st layer
inp = model.layers[0].input
# Output of the 3rd layer
output = model.layers[2].output

In [113]:
feature_func = K.function([inp], [output])

In [114]:
my_features = feature_func([X_train])[0]

In [115]:
plt.scatter(my_features[:,0], my_features[:,1], c = Y_train)


Out[115]:
<matplotlib.collections.PathCollection at 0x7f81d8b020b8>

In [116]:
model.evaluate(X_test, Y_test)


32/36 [=========================>....] - ETA: 0s
Out[116]:
[1.125011960665385, 0.19444444444444445]

Exercise 3

Keras functional API. So far we've always used the Sequential model API in Keras. However, Keras also offers a Functional API, which is much more powerful. You can find its documentation here. Let's see how we can leverage it.

  • define an input layer called inputs
  • define two hidden layers as before, one with 8 nodes, one with 5 nodes
  • define a second_to_last layer with 2 nodes
  • define an output layer with 3 nodes
  • create a model that connect input and output
  • train it and make sure that it converges
  • define a function between inputs and second_to_last layer
  • recalculate the features and plot them

In [117]:
K.clear_session()

In [118]:
from keras.models import Model
from keras.layers import Input

In [119]:
inputs = Input(shape = (13, ))

In [120]:
x = Dense(units = 8, 
          kernel_initializer= 'he_normal',
          activation = 'tanh')(inputs)
x = Dense(units = 5, 
          kernel_initializer= 'he_normal', 
          activation = 'tanh')(x)
second_to_last = Dense(units = 2, 
                       kernel_initializer = 'he_normal', 
                       activation = 'tanh')(x)
prediction = Dense(units = 3, 
                   activation = 'softmax')(second_to_last)

In [121]:
model = Model(inputs = inputs, outputs = prediction)
model.compile(optimizer = Adam(lr = 0.005), 
              loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])

In [122]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 13)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 112       
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 45        
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 12        
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 9         
=================================================================
Total params: 178
Trainable params: 178
Non-trainable params: 0
_________________________________________________________________

In [123]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model).create(prog = 'dot', 
                               format = 'svg'))


Out[123]:
G 140195583734784 input_1: InputLayer 140195662931672 dense_1: Dense 140195583734784->140195662931672 140195654504520 dense_2: Dense 140195662931672->140195654504520 140195662933856 dense_3: Dense 140195654504520->140195662933856 140195662921624 dense_4: Dense 140195662933856->140195662921624

In [124]:
model.fit(X_train, 
          Y_train,
          batch_size = 6,
          validation_split = 0.1, 
          epochs = 20, 
          verbose = 2)


Train on 127 samples, validate on 15 samples
Epoch 1/20
0s - loss: 1.0675 - acc: 0.3858 - val_loss: 1.0669 - val_acc: 0.2000
Epoch 2/20
0s - loss: 0.9324 - acc: 0.5354 - val_loss: 0.8977 - val_acc: 0.7333
Epoch 3/20
0s - loss: 0.7992 - acc: 0.7244 - val_loss: 0.7851 - val_acc: 0.6667
Epoch 4/20
0s - loss: 0.6869 - acc: 0.7638 - val_loss: 0.6945 - val_acc: 0.8000
Epoch 5/20
0s - loss: 0.6060 - acc: 0.8504 - val_loss: 0.5930 - val_acc: 0.8667
Epoch 6/20
0s - loss: 0.5103 - acc: 0.8976 - val_loss: 0.5259 - val_acc: 0.9333
Epoch 7/20
0s - loss: 0.4412 - acc: 0.9370 - val_loss: 0.4555 - val_acc: 0.9333
Epoch 8/20
0s - loss: 0.3723 - acc: 0.9291 - val_loss: 0.4163 - val_acc: 0.9333
Epoch 9/20
0s - loss: 0.3341 - acc: 0.9213 - val_loss: 0.4166 - val_acc: 0.9333
Epoch 10/20
0s - loss: 0.2766 - acc: 0.9449 - val_loss: 0.3221 - val_acc: 0.9333
Epoch 11/20
0s - loss: 0.2314 - acc: 0.9843 - val_loss: 0.2995 - val_acc: 0.9333
Epoch 12/20
0s - loss: 0.2238 - acc: 0.9685 - val_loss: 0.3199 - val_acc: 0.9333
Epoch 13/20
0s - loss: 0.2105 - acc: 0.9685 - val_loss: 0.3287 - val_acc: 0.9333
Epoch 14/20
0s - loss: 0.1658 - acc: 0.9843 - val_loss: 0.2170 - val_acc: 0.9333
Epoch 15/20
0s - loss: 0.1603 - acc: 0.9764 - val_loss: 0.2956 - val_acc: 0.9333
Epoch 16/20
0s - loss: 0.1449 - acc: 0.9764 - val_loss: 0.2449 - val_acc: 0.9333
Epoch 17/20
0s - loss: 0.1593 - acc: 0.9685 - val_loss: 0.2330 - val_acc: 0.9333
Epoch 18/20
0s - loss: 0.1093 - acc: 0.9921 - val_loss: 0.1898 - val_acc: 0.9333
Epoch 19/20
0s - loss: 0.1006 - acc: 1.0000 - val_loss: 0.1950 - val_acc: 0.9333
Epoch 20/20
0s - loss: 0.0936 - acc: 0.9921 - val_loss: 0.2117 - val_acc: 0.9333
Out[124]:
<keras.callbacks.History at 0x7f81d84920f0>

In [125]:
features_function = K.function([inputs], [second_to_last])

In [126]:
features = features_function([X_train])[0]

In [127]:
plt.scatter(features[:, 0], features[:, 1], c = Y_train)


Out[127]:
<matplotlib.collections.PathCollection at 0x7f81d874ef98>

Exercise 4

Keras offers the possibility to call a function at each epoch. These are Callbacks, and their documentation is here. Callbacks allow us to add some neat functionality. In this exercise we'll explore a few of them.

  • Split the data into train and test sets with a test_size = 0.3 and random_state = 42
  • Reset and recompile your model
  • train the model on the train data using validation_data = (X_test, y_test)
  • Use the EarlyStopping callback to stop your training if the val_loss doesn't improve
  • Use the ModelCheckpoint callback to save the trained model to disk once training is finished
  • Use the TensorBoard callback to output your training information to a /tmp/ subdirectory
  • Watch the next video for an overview of tensorboard

In [128]:
X = ex1.ix[:, ex1.columns != 'Class']
X.shape


Out[128]:
(178, 13)

In [129]:
Y = ex1['Class']
Y.shape


Out[129]:
(178,)

In [130]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [131]:
X.head()


Out[131]:
Alcohol Malic_acid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids Nonflavanoid_phenols Proanthocyanins Color_intensity Hue OD280-OD315_of_diluted_wines Proline
0 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
2 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
3 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
4 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735

In [132]:
minmax = MinMaxScaler()
X = minmax.fit_transform(X)

In [133]:
Y.head()


Out[133]:
0    1
1    1
2    1
3    1
4    1
Name: Class, dtype: int64

In [134]:
Y = pd.get_dummies(Y)

In [135]:
Y.head()


Out[135]:
1 2 3
0 1 0 0
1 1 0 0
2 1 0 0
3 1 0 0
4 1 0 0

In [136]:
Y = Y.values

In [137]:
X_train, X_test, Y_train, Y_test = train_test_split(X, 
                                                    Y, 
                                                    test_size = 0.3, 
                                                    random_state = 42)

In [138]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape


Out[138]:
((124, 13), (54, 13), (124, 3), (54, 3))

In [139]:
K.clear_session()

In [140]:
from keras.models import Model
from keras.layers import Input, Dense

In [141]:
inputs = Input(shape = (13, ))

In [142]:
net = Dense(units = 10, 
            activation = 'tanh', 
            kernel_initializer = 'he_normal')(inputs)
net = Dense(units = 10, 
            activation = 'tanh', 
            kernel_initializer = 'he_normal')(net)
prediction = Dense(units = 3, 
                   activation = 'softmax')(net)

In [143]:
model = Model(inputs = inputs, outputs = prediction)

In [144]:
model.compile(optimizer = Adam(lr = 0.01), 
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

In [145]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 13)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                140       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                110       
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 33        
=================================================================
Total params: 283
Trainable params: 283
Non-trainable params: 0
_________________________________________________________________

In [146]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
early_stop = EarlyStopping(monitor = 'val_loss', patience = 5)
model_check = ModelCheckpoint(filepath = './tmp/Notebook_5.hdf5', 
                              monitor = 'val_loss',
                              save_best_only = True)
tensorboard = TensorBoard(log_dir = './logs')

In [147]:
model.fit(X_train, 
          Y_train, 
          batch_size = 6, 
          epochs = 200, 
          validation_data = (X_test, Y_test), 
          verbose = 1, 
          callbacks = [early_stop, model_check, tensorboard])


Train on 124 samples, validate on 54 samples
Epoch 1/200
124/124 [==============================] - 0s - loss: 0.8327 - acc: 0.6452 - val_loss: 0.5552 - val_acc: 0.8889
Epoch 2/200
124/124 [==============================] - 0s - loss: 0.3953 - acc: 0.9274 - val_loss: 0.3273 - val_acc: 0.9259
Epoch 3/200
124/124 [==============================] - 0s - loss: 0.2290 - acc: 0.9435 - val_loss: 0.1792 - val_acc: 0.9259
Epoch 4/200
124/124 [==============================] - 0s - loss: 0.1551 - acc: 0.9597 - val_loss: 0.1484 - val_acc: 0.9815
Epoch 5/200
124/124 [==============================] - 0s - loss: 0.1254 - acc: 0.9677 - val_loss: 0.1119 - val_acc: 0.9444
Epoch 6/200
124/124 [==============================] - 0s - loss: 0.1084 - acc: 0.9677 - val_loss: 0.0916 - val_acc: 0.9630
Epoch 7/200
124/124 [==============================] - 0s - loss: 0.0818 - acc: 0.9839 - val_loss: 0.0722 - val_acc: 0.9815
Epoch 8/200
124/124 [==============================] - 0s - loss: 0.1004 - acc: 0.9677 - val_loss: 0.0647 - val_acc: 0.9815
Epoch 9/200
124/124 [==============================] - 0s - loss: 0.0872 - acc: 0.9597 - val_loss: 0.1631 - val_acc: 0.9444
Epoch 10/200
124/124 [==============================] - 0s - loss: 0.0927 - acc: 0.9597 - val_loss: 0.1530 - val_acc: 0.9444
Epoch 11/200
124/124 [==============================] - 0s - loss: 0.0911 - acc: 0.9597 - val_loss: 0.0947 - val_acc: 0.9630
Epoch 12/200
124/124 [==============================] - 0s - loss: 0.0419 - acc: 0.9919 - val_loss: 0.0726 - val_acc: 0.9815
Epoch 13/200
124/124 [==============================] - 0s - loss: 0.0286 - acc: 0.9919 - val_loss: 0.0632 - val_acc: 0.9815
Epoch 14/200
124/124 [==============================] - 0s - loss: 0.0466 - acc: 0.9919 - val_loss: 0.0509 - val_acc: 0.9815
Epoch 15/200
124/124 [==============================] - 0s - loss: 0.0232 - acc: 1.0000 - val_loss: 0.1016 - val_acc: 0.9630
Epoch 16/200
124/124 [==============================] - 0s - loss: 0.0201 - acc: 1.0000 - val_loss: 0.0903 - val_acc: 0.9815
Epoch 17/200
124/124 [==============================] - 0s - loss: 0.0138 - acc: 1.0000 - val_loss: 0.0703 - val_acc: 0.9815
Epoch 18/200
124/124 [==============================] - 0s - loss: 0.0110 - acc: 1.0000 - val_loss: 0.0770 - val_acc: 0.9815
Epoch 19/200
124/124 [==============================] - 0s - loss: 0.0103 - acc: 1.0000 - val_loss: 0.0750 - val_acc: 0.9815
Epoch 20/200
124/124 [==============================] - 0s - loss: 0.0075 - acc: 1.0000 - val_loss: 0.0731 - val_acc: 0.9815
Out[147]:
<keras.callbacks.History at 0x7f81f4603a58>