Fashion MNIST with Keras and Resnet

Adapted from

https://github.com/margaretmz/deep-learning/blob/master/fashion_mnist_keras.ipynb



In [0]:

    
!pip install -q tf-nightly-gpu-2.0-preview



In [2]:

    
import tensorflow as tf
print(tf.__version__)









    



2.0.0-dev20190819



In [0]:

    
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()



In [4]:

    
x_train.shape









    Out[4]:





(60000, 28, 28)



In [0]:

    
import numpy as np

# add empty color dimension
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)



In [6]:

    
x_train.shape









    Out[6]:





(60000, 28, 28, 1)



In [0]:

    
# recude memory and compute time
NUMBER_OF_SAMPLES = 50000



In [0]:

    
x_train_samples = x_train[:NUMBER_OF_SAMPLES]



In [0]:

    
y_train_samples = y_train[:NUMBER_OF_SAMPLES]



In [0]:

    
import skimage.data
import skimage.transform

x_train_224 = np.array([skimage.transform.resize(image, (32, 32)) for image in x_train_samples])



In [11]:

    
x_train_224.shape









    Out[11]:





(50000, 32, 32, 1)

Alternative: ResNet

basic ideas
- depth does matter
- 8x deeper than VGG
- possible by using shortcuts and skipping final fc layer
- prevents vanishing gradient problem
https://keras.io/applications/#resnet50
https://medium.com/towards-data-science/neural-network-architectures-156e5bad51ba

http://arxiv.org/abs/1512.03385



In [12]:

    
from tensorflow.keras.applications.resnet50 import ResNet50

# https://keras.io/applications/#mobilenet
# https://arxiv.org/pdf/1704.04861.pdf
from tensorflow.keras.applications.mobilenet import MobileNet

# model = ResNet50(classes=10, weights=None, input_shape=(32, 32, 1))
model = MobileNet(classes=10, weights=None, input_shape=(32, 32, 1))

model.summary()









    



Model: "mobilenet_1.00_32"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 32, 32, 1)]       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 33, 33, 1)         0         
_________________________________________________________________
conv1 (Conv2D)               (None, 16, 16, 32)        288       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 16, 16, 32)        128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 16, 16, 32)        0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 16, 16, 32)        288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 16, 16, 32)        128       
_________________________________________________________________
conv_dw_1_relu (ReLU)        (None, 16, 16, 32)        0         
_________________________________________________________________
conv_pw_1 (Conv2D)           (None, 16, 16, 64)        2048      
_________________________________________________________________
conv_pw_1_bn (BatchNormaliza (None, 16, 16, 64)        256       
_________________________________________________________________
conv_pw_1_relu (ReLU)        (None, 16, 16, 64)        0         
_________________________________________________________________
conv_pad_2 (ZeroPadding2D)   (None, 17, 17, 64)        0         
_________________________________________________________________
conv_dw_2 (DepthwiseConv2D)  (None, 8, 8, 64)          576       
_________________________________________________________________
conv_dw_2_bn (BatchNormaliza (None, 8, 8, 64)          256       
_________________________________________________________________
conv_dw_2_relu (ReLU)        (None, 8, 8, 64)          0         
_________________________________________________________________
conv_pw_2 (Conv2D)           (None, 8, 8, 128)         8192      
_________________________________________________________________
conv_pw_2_bn (BatchNormaliza (None, 8, 8, 128)         512       
_________________________________________________________________
conv_pw_2_relu (ReLU)        (None, 8, 8, 128)         0         
_________________________________________________________________
conv_dw_3 (DepthwiseConv2D)  (None, 8, 8, 128)         1152      
_________________________________________________________________
conv_dw_3_bn (BatchNormaliza (None, 8, 8, 128)         512       
_________________________________________________________________
conv_dw_3_relu (ReLU)        (None, 8, 8, 128)         0         
_________________________________________________________________
conv_pw_3 (Conv2D)           (None, 8, 8, 128)         16384     
_________________________________________________________________
conv_pw_3_bn (BatchNormaliza (None, 8, 8, 128)         512       
_________________________________________________________________
conv_pw_3_relu (ReLU)        (None, 8, 8, 128)         0         
_________________________________________________________________
conv_pad_4 (ZeroPadding2D)   (None, 9, 9, 128)         0         
_________________________________________________________________
conv_dw_4 (DepthwiseConv2D)  (None, 4, 4, 128)         1152      
_________________________________________________________________
conv_dw_4_bn (BatchNormaliza (None, 4, 4, 128)         512       
_________________________________________________________________
conv_dw_4_relu (ReLU)        (None, 4, 4, 128)         0         
_________________________________________________________________
conv_pw_4 (Conv2D)           (None, 4, 4, 256)         32768     
_________________________________________________________________
conv_pw_4_bn (BatchNormaliza (None, 4, 4, 256)         1024      
_________________________________________________________________
conv_pw_4_relu (ReLU)        (None, 4, 4, 256)         0         
_________________________________________________________________
conv_dw_5 (DepthwiseConv2D)  (None, 4, 4, 256)         2304      
_________________________________________________________________
conv_dw_5_bn (BatchNormaliza (None, 4, 4, 256)         1024      
_________________________________________________________________
conv_dw_5_relu (ReLU)        (None, 4, 4, 256)         0         
_________________________________________________________________
conv_pw_5 (Conv2D)           (None, 4, 4, 256)         65536     
_________________________________________________________________
conv_pw_5_bn (BatchNormaliza (None, 4, 4, 256)         1024      
_________________________________________________________________
conv_pw_5_relu (ReLU)        (None, 4, 4, 256)         0         
_________________________________________________________________
conv_pad_6 (ZeroPadding2D)   (None, 5, 5, 256)         0         
_________________________________________________________________
conv_dw_6 (DepthwiseConv2D)  (None, 2, 2, 256)         2304      
_________________________________________________________________
conv_dw_6_bn (BatchNormaliza (None, 2, 2, 256)         1024      
_________________________________________________________________
conv_dw_6_relu (ReLU)        (None, 2, 2, 256)         0         
_________________________________________________________________
conv_pw_6 (Conv2D)           (None, 2, 2, 512)         131072    
_________________________________________________________________
conv_pw_6_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_6_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_dw_7 (DepthwiseConv2D)  (None, 2, 2, 512)         4608      
_________________________________________________________________
conv_dw_7_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_dw_7_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pw_7 (Conv2D)           (None, 2, 2, 512)         262144    
_________________________________________________________________
conv_pw_7_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_7_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_dw_8 (DepthwiseConv2D)  (None, 2, 2, 512)         4608      
_________________________________________________________________
conv_dw_8_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_dw_8_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pw_8 (Conv2D)           (None, 2, 2, 512)         262144    
_________________________________________________________________
conv_pw_8_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_8_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_dw_9 (DepthwiseConv2D)  (None, 2, 2, 512)         4608      
_________________________________________________________________
conv_dw_9_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_dw_9_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pw_9 (Conv2D)           (None, 2, 2, 512)         262144    
_________________________________________________________________
conv_pw_9_bn (BatchNormaliza (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_9_relu (ReLU)        (None, 2, 2, 512)         0         
_________________________________________________________________
conv_dw_10 (DepthwiseConv2D) (None, 2, 2, 512)         4608      
_________________________________________________________________
conv_dw_10_bn (BatchNormaliz (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_dw_10_relu (ReLU)       (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pw_10 (Conv2D)          (None, 2, 2, 512)         262144    
_________________________________________________________________
conv_pw_10_bn (BatchNormaliz (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_10_relu (ReLU)       (None, 2, 2, 512)         0         
_________________________________________________________________
conv_dw_11 (DepthwiseConv2D) (None, 2, 2, 512)         4608      
_________________________________________________________________
conv_dw_11_bn (BatchNormaliz (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_dw_11_relu (ReLU)       (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pw_11 (Conv2D)          (None, 2, 2, 512)         262144    
_________________________________________________________________
conv_pw_11_bn (BatchNormaliz (None, 2, 2, 512)         2048      
_________________________________________________________________
conv_pw_11_relu (ReLU)       (None, 2, 2, 512)         0         
_________________________________________________________________
conv_pad_12 (ZeroPadding2D)  (None, 3, 3, 512)         0         
_________________________________________________________________
conv_dw_12 (DepthwiseConv2D) (None, 1, 1, 512)         4608      
_________________________________________________________________
conv_dw_12_bn (BatchNormaliz (None, 1, 1, 512)         2048      
_________________________________________________________________
conv_dw_12_relu (ReLU)       (None, 1, 1, 512)         0         
_________________________________________________________________
conv_pw_12 (Conv2D)          (None, 1, 1, 1024)        524288    
_________________________________________________________________
conv_pw_12_bn (BatchNormaliz (None, 1, 1, 1024)        4096      
_________________________________________________________________
conv_pw_12_relu (ReLU)       (None, 1, 1, 1024)        0         
_________________________________________________________________
conv_dw_13 (DepthwiseConv2D) (None, 1, 1, 1024)        9216      
_________________________________________________________________
conv_dw_13_bn (BatchNormaliz (None, 1, 1, 1024)        4096      
_________________________________________________________________
conv_dw_13_relu (ReLU)       (None, 1, 1, 1024)        0         
_________________________________________________________________
conv_pw_13 (Conv2D)          (None, 1, 1, 1024)        1048576   
_________________________________________________________________
conv_pw_13_bn (BatchNormaliz (None, 1, 1, 1024)        4096      
_________________________________________________________________
conv_pw_13_relu (ReLU)       (None, 1, 1, 1024)        0         
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 1, 1, 1024)        0         
_________________________________________________________________
dropout (Dropout)            (None, 1, 1, 1024)        0         
_________________________________________________________________
conv_preds (Conv2D)          (None, 1, 1, 10)          10250     
_________________________________________________________________
reshape_2 (Reshape)          (None, 10)                0         
_________________________________________________________________
act_softmax (Activation)     (None, 10)                0         
=================================================================
Total params: 3,238,538
Trainable params: 3,216,650
Non-trainable params: 21,888
_________________________________________________________________



In [13]:

    
%%time

BATCH_SIZE=10
EPOCHS = 10

model.compile(loss='sparse_categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

history = model.fit(x_train_224, y_train_samples, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, verbose=1)









    



Train on 40000 samples, validate on 10000 samples
Epoch 1/10
50000/40000 [=====================================] - 58s 1ms/sample - loss: 0.8670 - accuracy: 0.7041 - val_loss: 0.6607 - val_accuracy: 0.7319
Epoch 2/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.5105 - accuracy: 0.8125 - val_loss: 0.4264 - val_accuracy: 0.8509
Epoch 3/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.4074 - accuracy: 0.8528 - val_loss: 0.3954 - val_accuracy: 0.8757
Epoch 4/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.3718 - accuracy: 0.8736 - val_loss: 0.3468 - val_accuracy: 0.8754
Epoch 5/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.2917 - accuracy: 0.8883 - val_loss: 0.3616 - val_accuracy: 0.8866
Epoch 6/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.2873 - accuracy: 0.8964 - val_loss: 0.3145 - val_accuracy: 0.8890
Epoch 7/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.2755 - accuracy: 0.9036 - val_loss: 0.3005 - val_accuracy: 0.8980
Epoch 8/10
50000/40000 [=====================================] - 52s 1ms/sample - loss: 0.2390 - accuracy: 0.9130 - val_loss: 0.2683 - val_accuracy: 0.9056






    



WARNING: Logging before flag parsing goes to stderr.
W0819 14:20:06.290217 140445368510336 training_v2.py:146] Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 50000 batches). You may need to use the repeat() function when building your dataset.






    



Epoch 9/10
    0/40000 [..............................] - ETA: 0s





    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-b1f3f8e4d39e> in <module>()
----> 1 get_ipython().run_cell_magic('time', '', "\nBATCH_SIZE=10\nEPOCHS = 10\n\nmodel.compile(loss='sparse_categorical_crossentropy',\n             optimizer='adam',\n             metrics=['accuracy'])\n\nhistory = model.fit(x_train_224, y_train_samples, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.2, verbose=1)")

/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2115             magic_arg_s = self.var_expand(line, stack_depth)
   2116             with self.builtin_trap:
-> 2117                 result = fn(magic_arg_s, cell)
   2118             return result
   2119 

</usr/local/lib/python3.6/dist-packages/decorator.py:decorator-gen-60> in time(self, line, cell, local_ns)

/usr/local/lib/python3.6/dist-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
    186     # but it's overkill for just that one bit of state.
    187     def magic_deco(arg):
--> 188         call = lambda f, *a, **k: f(*a, **k)
    189 
    190         if callable(arg):

/usr/local/lib/python3.6/dist-packages/IPython/core/magics/execution.py in time(self, line, cell, local_ns)
   1191         else:
   1192             st = clock2()
-> 1193             exec(code, glob, local_ns)
   1194             end = clock2()
   1195             out = None

<timed exec> in <module>()

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    729         max_queue_size=max_queue_size,
    730         workers=workers,
--> 731         use_multiprocessing=use_multiprocessing)
    732 
    733   def evaluate(self,

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    329                 mode=ModeKeys.TRAIN,
    330                 training_context=training_context,
--> 331                 total_epochs=epochs)
    332             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    333 

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    178 
    179   # End of an epoch.
--> 180   aggregator.finalize()
    181   return aggregator.results
    182 

/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_utils.py in finalize(self)
    136   def finalize(self):
    137     if not self.results:
--> 138       raise ValueError('Empty training data.')
    139     self.results[0] /= (self.num_samples or self.steps)
    140 

ValueError: Empty training data.



In [15]:

    
import matplotlib.pyplot as plt

plt.xlabel('epochs')
plt.ylabel('loss')

plt.yscale('log')

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Loss', 'Validation Loss'])









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-f0885def40f0> in <module>()
      6 plt.yscale('log')
      7 
----> 8 plt.plot(history.history['loss'])
      9 plt.plot(history.history['val_loss'])
     10 plt.legend(['Loss', 'Validation Loss'])

NameError: name 'history' is not defined



In [0]:

    
plt.xlabel('epochs')
plt.ylabel('accuracy')

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Accuracy', 'Validation Accuracy'])

Checking our results (inference)



In [0]:

    
x_test_224 = np.array([skimage.transform.resize(image, (32, 32)) for image in x_test])



In [0]:

    
LABEL_NAMES = ['t_shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle_boots']


def plot_predictions(images, predictions):
  n = images.shape[0]
  nc = int(np.ceil(n / 4))
  f, axes = plt.subplots(nc, 4)
  for i in range(nc * 4):
    y = i // 4
    x = i % 4
    axes[x, y].axis('off')
    
    label = LABEL_NAMES[np.argmax(predictions[i])]
    confidence = np.max(predictions[i])
    if i > n:
      continue
    axes[x, y].imshow(images[i])
    axes[x, y].text(0.5, 0.5, label + '\n%.3f' % confidence, fontsize=14)

  plt.gcf().set_size_inches(8, 8)  

plot_predictions(np.squeeze(x_test_224[:16]), 
                 model.predict(x_test_224[:16]))



In [0]:

    
train_loss, train_accuracy = model.evaluate(x_train_224, y_train_samples, batch_size=BATCH_SIZE)
train_accuracy



In [0]:

    
test_loss, test_accuracy = model.evaluate(x_test_224, y_test, batch_size=BATCH_SIZE)
test_accuracy



In [0]: