In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
import matplotlib.pylab as plt
import numpy as np

In [4]:
from distutils.version import StrictVersion

In [5]:
import sklearn
print(sklearn.__version__)

assert StrictVersion(sklearn.__version__ ) >= StrictVersion('0.18.1')


0.18.1

In [6]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

assert StrictVersion(tf.__version__) >= StrictVersion('1.1.0')


1.2.1

In [7]:
import keras
print(keras.__version__)

assert StrictVersion(keras.__version__) >= StrictVersion('2.0.0')


Using TensorFlow backend.
2.0.6

This script goes along the blog post "Building powerful image classification models using very little data" from blog.keras.io. It uses data that can be downloaded at: https://www.kaggle.com/c/dogs-vs-cats/data In our setup, we:

  • created a data/ folder
  • created train/ and validation/ subfolders inside data/
  • created cats/ and dogs/ subfolders inside train/ and validation/
  • put the cat pictures index 0-999 in data/train/cats
  • put the cat pictures index 1000-1400 in data/validation/cats
  • put the dogs pictures index 12500-13499 in data/train/dogs
  • put the dog pictures index 13500-13900 in data/validation/dogs So that we have 1000 training examples for each class, and 400 validation examples for each class. In summary, this is our directory structure:
    data/
      train/
          dogs/
              dog001.jpg
              dog002.jpg
              ...
          cats/
              cat001.jpg
              cat002.jpg
              ...
      validation/
          dogs/
              dog001.jpg
              dog002.jpg
              ...
          cats/
              cat001.jpg
              cat002.jpg
              ...

In [8]:
!ls -lh data


total 8.0K
drwxrwxr-x 4 ubuntu ubuntu 4.0K Aug 31 18:52 train
drwxrwxr-x 4 ubuntu ubuntu 4.0K Aug 31 18:52 validation

In [11]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

# dimensions of our images.
img_width, img_height = 150, 150

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16

In [12]:
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')

Next step is to use those saved bottleneck feature activations and train our own, very simple fc layer


In [15]:
train_data = np.load(open('bottleneck_features_train.npy', 'rb'))

In [24]:
train_data.shape[1:]


Out[24]:
(4, 4, 512)

In [17]:
# first half of data is dog (0), second half is cat (1)
train_labels = np.array(
    [0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

In [20]:
# same for validation
validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
validation_labels = np.array(
    [0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

In [21]:
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
flatten_2 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
=================================================================
Total params: 2,097,665
Trainable params: 2,097,665
Non-trainable params: 0
_________________________________________________________________

In [25]:
model.fit(train_data, train_labels,
          epochs=epochs,
          batch_size=batch_size,
          validation_data=(validation_data, validation_labels))


Train on 2000 samples, validate on 800 samples
Epoch 1/50
2000/2000 [==============================] - 1s - loss: 0.8996 - acc: 0.7615 - val_loss: 0.3003 - val_acc: 0.8675
Epoch 2/50
2000/2000 [==============================] - 0s - loss: 0.3514 - acc: 0.8585 - val_loss: 0.3761 - val_acc: 0.8375
Epoch 3/50
2000/2000 [==============================] - 0s - loss: 0.2859 - acc: 0.8860 - val_loss: 0.2506 - val_acc: 0.8988
Epoch 4/50
2000/2000 [==============================] - 0s - loss: 0.2573 - acc: 0.9115 - val_loss: 0.3732 - val_acc: 0.8575
Epoch 5/50
2000/2000 [==============================] - 0s - loss: 0.2186 - acc: 0.9150 - val_loss: 0.2909 - val_acc: 0.9087
Epoch 6/50
2000/2000 [==============================] - 0s - loss: 0.2290 - acc: 0.9190 - val_loss: 0.3718 - val_acc: 0.8812
Epoch 7/50
2000/2000 [==============================] - 0s - loss: 0.1811 - acc: 0.9305 - val_loss: 0.3277 - val_acc: 0.9038
Epoch 8/50
2000/2000 [==============================] - 0s - loss: 0.1483 - acc: 0.9495 - val_loss: 0.4777 - val_acc: 0.8650
Epoch 9/50
2000/2000 [==============================] - 0s - loss: 0.1726 - acc: 0.9440 - val_loss: 0.3357 - val_acc: 0.9062
Epoch 10/50
2000/2000 [==============================] - 0s - loss: 0.1407 - acc: 0.9515 - val_loss: 0.7321 - val_acc: 0.8113
Epoch 11/50
2000/2000 [==============================] - 0s - loss: 0.1062 - acc: 0.9560 - val_loss: 0.4178 - val_acc: 0.9012
Epoch 12/50
2000/2000 [==============================] - 0s - loss: 0.1119 - acc: 0.9610 - val_loss: 0.4422 - val_acc: 0.8988
Epoch 13/50
2000/2000 [==============================] - 0s - loss: 0.0906 - acc: 0.9710 - val_loss: 0.4602 - val_acc: 0.9038
Epoch 14/50
2000/2000 [==============================] - 0s - loss: 0.0909 - acc: 0.9660 - val_loss: 0.4741 - val_acc: 0.9062
Epoch 15/50
2000/2000 [==============================] - 0s - loss: 0.0817 - acc: 0.9715 - val_loss: 0.5284 - val_acc: 0.8962
Epoch 16/50
2000/2000 [==============================] - 0s - loss: 0.0591 - acc: 0.9795 - val_loss: 0.5634 - val_acc: 0.8912
Epoch 17/50
2000/2000 [==============================] - 0s - loss: 0.0738 - acc: 0.9745 - val_loss: 0.6012 - val_acc: 0.9012
Epoch 18/50
2000/2000 [==============================] - 0s - loss: 0.0647 - acc: 0.9785 - val_loss: 0.9832 - val_acc: 0.8475
Epoch 19/50
2000/2000 [==============================] - 0s - loss: 0.0597 - acc: 0.9810 - val_loss: 0.6146 - val_acc: 0.9025
Epoch 20/50
2000/2000 [==============================] - 0s - loss: 0.0493 - acc: 0.9825 - val_loss: 0.5983 - val_acc: 0.9012
Epoch 21/50
2000/2000 [==============================] - 0s - loss: 0.0513 - acc: 0.9890 - val_loss: 0.7402 - val_acc: 0.8862
Epoch 22/50
2000/2000 [==============================] - 0s - loss: 0.0460 - acc: 0.9805 - val_loss: 0.6586 - val_acc: 0.9062
Epoch 23/50
2000/2000 [==============================] - 0s - loss: 0.0468 - acc: 0.9840 - val_loss: 0.6205 - val_acc: 0.9000
Epoch 24/50
2000/2000 [==============================] - 0s - loss: 0.0260 - acc: 0.9920 - val_loss: 0.7162 - val_acc: 0.8962
Epoch 25/50
2000/2000 [==============================] - 0s - loss: 0.0400 - acc: 0.9855 - val_loss: 0.7215 - val_acc: 0.9012
Epoch 26/50
2000/2000 [==============================] - 0s - loss: 0.0290 - acc: 0.9880 - val_loss: 0.7875 - val_acc: 0.8925
Epoch 27/50
2000/2000 [==============================] - 0s - loss: 0.0337 - acc: 0.9900 - val_loss: 0.7253 - val_acc: 0.8975
Epoch 28/50
2000/2000 [==============================] - 0s - loss: 0.0248 - acc: 0.9930 - val_loss: 0.7992 - val_acc: 0.9075
Epoch 29/50
2000/2000 [==============================] - 0s - loss: 0.0309 - acc: 0.9905 - val_loss: 0.7616 - val_acc: 0.8988
Epoch 30/50
2000/2000 [==============================] - 0s - loss: 0.0216 - acc: 0.9920 - val_loss: 0.8326 - val_acc: 0.9038
Epoch 31/50
2000/2000 [==============================] - 0s - loss: 0.0304 - acc: 0.9915 - val_loss: 0.8008 - val_acc: 0.9062
Epoch 32/50
2000/2000 [==============================] - 0s - loss: 0.0405 - acc: 0.9895 - val_loss: 0.8107 - val_acc: 0.9050
Epoch 33/50
2000/2000 [==============================] - 0s - loss: 0.0297 - acc: 0.9915 - val_loss: 1.0684 - val_acc: 0.8825
Epoch 34/50
2000/2000 [==============================] - 0s - loss: 0.0258 - acc: 0.9880 - val_loss: 0.8165 - val_acc: 0.8962
Epoch 35/50
2000/2000 [==============================] - 0s - loss: 0.0185 - acc: 0.9950 - val_loss: 0.8440 - val_acc: 0.8975
Epoch 36/50
2000/2000 [==============================] - 0s - loss: 0.0229 - acc: 0.9920 - val_loss: 0.9580 - val_acc: 0.8925
Epoch 37/50
2000/2000 [==============================] - 0s - loss: 0.0137 - acc: 0.9950 - val_loss: 0.8580 - val_acc: 0.8988
Epoch 38/50
2000/2000 [==============================] - 0s - loss: 0.0219 - acc: 0.9935 - val_loss: 1.2534 - val_acc: 0.8638
Epoch 39/50
2000/2000 [==============================] - 0s - loss: 0.0103 - acc: 0.9950 - val_loss: 0.8610 - val_acc: 0.8962
Epoch 40/50
2000/2000 [==============================] - 0s - loss: 0.0077 - acc: 0.9965 - val_loss: 1.0522 - val_acc: 0.8875
Epoch 41/50
2000/2000 [==============================] - 0s - loss: 0.0152 - acc: 0.9960 - val_loss: 0.9590 - val_acc: 0.9038
Epoch 42/50
2000/2000 [==============================] - 0s - loss: 0.0072 - acc: 0.9980 - val_loss: 0.9743 - val_acc: 0.8975
Epoch 43/50
2000/2000 [==============================] - 0s - loss: 0.0223 - acc: 0.9935 - val_loss: 0.9335 - val_acc: 0.9050
Epoch 44/50
2000/2000 [==============================] - 0s - loss: 0.0173 - acc: 0.9950 - val_loss: 0.9496 - val_acc: 0.9050
Epoch 45/50
2000/2000 [==============================] - 0s - loss: 0.0089 - acc: 0.9965 - val_loss: 0.9533 - val_acc: 0.8975
Epoch 46/50
2000/2000 [==============================] - 0s - loss: 0.0076 - acc: 0.9975 - val_loss: 1.0718 - val_acc: 0.8900
Epoch 47/50
2000/2000 [==============================] - 0s - loss: 0.0226 - acc: 0.9950 - val_loss: 1.0592 - val_acc: 0.8962
Epoch 48/50
2000/2000 [==============================] - 0s - loss: 0.0078 - acc: 0.9980 - val_loss: 0.9537 - val_acc: 0.8950
Epoch 49/50
2000/2000 [==============================] - 0s - loss: 0.0169 - acc: 0.9945 - val_loss: 1.4499 - val_acc: 0.8588
Epoch 50/50
2000/2000 [==============================] - 0s - loss: 0.0159 - acc: 0.9970 - val_loss: 0.9510 - val_acc: 0.9025
Out[25]:
<keras.callbacks.History at 0x7f16f7ff8eb8>

In [26]:
top_model_weights_path = 'bottleneck_fc_model.h5'
model.save_weights(top_model_weights_path)

In [ ]: