In [ ]:
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
In this lab, you will attempt to find an improvement on a mini-VGG for CIFAR-10.
If one tried training a CIFAR-10 with a full VGG16 or VGG19, it will not learn. This is because the input size of CIFAR-10 is very small (32x32), so by the time you get to the bottleneck layer (last convolution before flattening), the feature maps will be only 1x1 pixels -- and thus have no spatial information. Conventional practice is 4x4 is ideal, but 3x3 generally works.
Below is a composable "class" based version for building VGG networks. Spend a few moments looking at the structure and get familiar.
In [ ]:
# VGG (16 and 19 & Composable) (2014)
# Paper: https://arxiv.org/pdf/1409.1556.pdf
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
class VGG(object):
""" VGG (composable)
"""
# Meta-parameter: list of groups: number of layers and filter size
groups = { 16 : [ (1, 64), (2, 128), (3, 256), (3, 512), (3, 512) ], # VGG16
19 : [ (1, 64), (2, 128), (4, 256), (4, 256), (4, 256) ] } # VGG19
init_weights='glorot_uniform'
_model = None
def __init__(self, n_layers, input_shape=(224, 224, 3), n_classes=1000):
""" Construct a VGG model
n_layers : number of layers (16 or 19)
input_shape : input shape to the model
n_classes: : number of output classes
"""
if n_layers not in [16, 19]:
raise Exception("VGG: Invalid value for n_layers")
# The input vector
inputs = Input( input_shape )
# The stem group
x = self.stem(inputs)
# The learner
x = self.learner(x, self.groups[n_layers])
# The classifier
outputs = self.classifier(x, n_classes)
# Instantiate the Model
self._model = Model(inputs, outputs)
@property
def model(self):
return self._model
@model.setter
def model(self, _model):
self._model = _model
def stem(self, inputs):
""" Construct the Stem Convolutional Group
inputs : the input vector
"""
x = Conv2D(64, (3, 3), strides=(1, 1), padding="same", activation="relu",
kernel_initializer=self.init_weights)(inputs)
return x
def learner(self, x, blocks):
""" Construct the (Feature) Learner
x : input to the learner
blocks : list of groups: filter size and number of conv layers
"""
# The convolutional groups
for n_layers, n_filters in blocks:
x = self.group(x, n_layers, n_filters)
return x
@staticmethod
def group(x, n_layers, n_filters, init_weights=None):
""" Construct a Convolutional Group
x : input to the group
n_layers : number of convolutional layers
n_filters: number of filters
"""
if init_weights is None:
init_weights = VGG.init_weights
# Block of convolutional layers
for n in range(n_layers):
x = Conv2D(n_filters, (3, 3), strides=(1, 1), padding="same", activation="relu",
kernel_initializer=init_weights)(x)
# Max pooling at the end of the block
x = MaxPooling2D(2, strides=(2, 2))(x)
return x
def classifier(self, x, n_classes):
""" Construct the Classifier
x : input to the classifier
n_classes : number of output classes
"""
# Flatten the feature maps
x = Flatten()(x)
# Two fully connected dense layers
x = Dense(4096, activation='relu', kernel_initializer=self.init_weights)(x)
x = Dense(4096, activation='relu', kernel_initializer=self.init_weights)(x)
# Output layer for classification
x = Dense(n_classes, activation='softmax', kernel_initializer=self.init_weights)(x)
return x
# Example of constructing a VGG 16
# vgg = VGG(16)
# model = vgg.model
Below is a mini-VGG I wrote for CIFAR-10. Notice how at the bottleneck layer the feature maps are 3 x 3 (max pooling).
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) [(None, 32, 32, 3)] 0
_________________________________________________________________
conv2d_26 (Conv2D) (None, 28, 28, 32) 2432
_________________________________________________________________
conv2d_27 (Conv2D) (None, 28, 28, 64) 18496
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 14, 14, 64) 0
_________________________________________________________________
conv2d_28 (Conv2D) (None, 14, 14, 128) 73856
_________________________________________________________________
conv2d_29 (Conv2D) (None, 14, 14, 128) 147584
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 7, 7, 128) 0
_________________________________________________________________
flatten_3 (Flatten) (None, 6272) 0
_________________________________________________________________
dense_5 (Dense) (None, 10) 62730
=================================================================
Total params: 305,098
Trainable params: 305,098
Non-trainable params: 0
Below is the results for training for 10 epochs. Notice how the validation accuracy plateaus out around 74%. Perhaps it is overfitting to the data.
validate on 5000 samples
Epoch 1/10
45000/45000 [==============================] - 73s 2ms/sample - loss: 1.4581 - acc: 0.4728 - val_loss: 1.1139 - val_acc: 0.6030
Epoch 2/10
45000/45000 [==============================] - 77s 2ms/sample - loss: 0.9879 - acc: 0.6559 - val_loss: 0.8826 - val_acc: 0.6948
Epoch 3/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.7916 - acc: 0.7264 - val_loss: 0.8561 - val_acc: 0.7152
Epoch 4/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.6645 - acc: 0.7689 - val_loss: 0.7758 - val_acc: 0.7362
Epoch 5/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.5571 - acc: 0.8058 - val_loss: 0.7687 - val_acc: 0.7568
Epoch 6/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.4691 - acc: 0.8349 - val_loss: 0.7511 - val_acc: 0.7558
Epoch 7/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.3811 - acc: 0.8669 - val_loss: 0.8617 - val_acc: 0.7520
Epoch 8/10
45000/45000 [==============================] - 82s 2ms/sample - loss: 0.3132 - acc: 0.8897 - val_loss: 0.9241 - val_acc: 0.7468
Epoch 9/10
45000/45000 [==============================] - 81s 2ms/sample - loss: 0.2583 - acc: 0.9076 - val_loss: 1.0457 - val_acc: 0.7438
Epoch 10/10
45000/45000 [==============================] - 83s 2ms/sample - loss: 0.2174 - acc: 0.9221 - val_loss: 1.1191 - val_acc: 0.7428
How could we improve this?
Perhaps adding regularization (dropout)?
Perhaps adding batch normalization between the two VGG groups?
Perhaps adding a squeeze (dimensionality reduction) group --model has less parameters to train?
Perhaps adding another VGG group with doubling of filters (dimensionality expansion) --model has more parameters to train.
If this is a classroom, we will split into 4 teams and each team will use a different approach.
In [ ]:
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, Flatten, Dense, GlobalAveragePooling2D, Reshape
# Stem
inputs = Input((32, 32, 3))
x = Conv2D(32, (3, 3), strides=1, padding='valid', activation='relu')(inputs)
# Learner
# VGG group: 1 conv layer, 64 filters
# VGG group: 2 conv layers, 128 filters
x = VGG.group(x, 1, 64)
x = VGG.group(x, 2, 128)
# Classifier
x = Flatten()(x)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()
In [ ]:
# Train the model on CIFAR-10
from tensorflow.keras.datasets import cifar10
import numpy as np
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = (x_train / 255.0).astype(np.float32)
x_test = (x_test / 255.0).astype(np.float32)
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)
In [ ]:
In [ ]: