In [1]:
import warnings
warnings.filterwarnings('ignore')
In [2]:
%matplotlib inline
%pylab inline
In [3]:
import matplotlib.pylab as plt
import numpy as np
In [4]:
from distutils.version import StrictVersion
In [5]:
import sklearn
print(sklearn.__version__)
assert StrictVersion(sklearn.__version__ ) >= StrictVersion('0.18.1')
In [6]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)
assert StrictVersion(tf.__version__) >= StrictVersion('1.1.0')
In [7]:
import keras
print(keras.__version__)
assert StrictVersion(keras.__version__) >= StrictVersion('2.0.0')
This script goes along the blog post "Building powerful image classification models using very little data" from blog.keras.io. It uses data that can be downloaded at: https://www.kaggle.com/c/dogs-vs-cats/data In our setup, we:
data/
train/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
validation/
dogs/
dog001.jpg
dog002.jpg
...
cats/
cat001.jpg
cat002.jpg
...
In [8]:
!ls -lh data
In [9]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
# dimensions of our images.
img_width, img_height = 150, 150
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16
In [10]:
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
In [17]:
model.summary()
In [11]:
# just for rescaling
datagen = ImageDataGenerator(rescale=1. / 255)
In [12]:
train_data_generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
In [13]:
bottleneck_features_train = model.predict_generator(
train_data_generator, nb_train_samples // batch_size)
In [15]:
# 2000 images, 512 bottleneck features, 4*4 in size
bottleneck_features_train.shape
Out[15]:
In [18]:
np.save(open('bottleneck_features_train.npy', 'wb'),
bottleneck_features_train)
In [19]:
# same for validation
validation_data_generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_validation = model.predict_generator(
validation_data_generator, nb_validation_samples // batch_size)
np.save(open('bottleneck_features_validation.npy', 'wb'),
bottleneck_features_validation)
In [ ]: