from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
import utils; reload(utils)
from utils import plots
from keras.optimizers import RMSprop

path = "data/" #sample/"
model_path = path + 'models/'

# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16

vgg = Vgg16()
train_path = os.path.join(path, 'train')
valid_path = os.path.join(path, 'valid')
test_path = os.path.join(path, 'test')
print(train_path, valid_path)
from keras.preprocessing.image import ImageDataGenerator

data/train data/valid

datagen = ImageDataGenerator(

#batches = vgg.get_batches(train_path, batch_size=batch_size)
batches =  datagen.flow_from_directory(train_path, target_size=(224,224), class_mode='categorical', shuffle=True,
#val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)
val_batches = datagen.flow_from_directory(valid_path, target_size=(224,224), class_mode='categorical', shuffle=True,
#from keras.models import Sequential
#from keras.layers import Dropout, Flatten, Dense
#vgg.model.add(Dense(256, activation='relu'))
#vgg.model.add(Dense(1, activation='sigmoid'))
#for layer in model.layers: layer.trainable = False
#K.set_value(, 0.01)

Found 21000 images belonging to 2 classes.
Found 4000 images belonging to 2 classes.

Without ImageDataGenerator:

200/200 [==============================] - 7s - loss: 1.2116 - acc: 0.6400 - val_loss: 0.1929 - val_acc: 0.9400


200/200 [==============================] - 7s - loss: 0.6619 - acc: 0.7250 - val_loss: 0.2783 - val_acc: 0.8800

Last try with trainable from first dense layer on:

Code was:

# Get the index of the first dense layer...
#first_dense_idx = [index for index,layer in enumerate(layers) if type(layer) is Dense][0]
# ...and set this and all subsequent layers to trainable
#for layer in layers[first_dense_idx:]: layer.trainable=True
    Epoch 1/3
    21000/21000 [==============================] - 601s - loss: 0.1698 - acc: 0.9534 - val_loss: 0.0847 - val_acc: 0.9745
    Epoch 2/3
    21000/21000 [==============================] - 600s - loss: 0.1379 - acc: 0.9643 - val_loss: 0.0816 - val_acc: 0.9760
    Epoch 3/3
    21000/21000 [==============================] - 600s - loss: 0.1367 - acc: 0.9645 - val_loss: 0.0823 - val_acc: 0.9780

from keras import backend as K
opt = RMSprop(lr=0.1)
K.set_value(, 0.001)
vgg.model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
layers = vgg.model.layers

for layer in layers[12:]: layer.trainable=True, val_batches, nb_epoch=5)

Epoch 1/5
21000/21000 [==============================] - 602s - loss: 0.1771 - acc: 0.9524 - val_loss: 0.0957 - val_acc: 0.9740
Epoch 2/5
21000/21000 [==============================] - 603s - loss: 0.1377 - acc: 0.9668 - val_loss: 0.1161 - val_acc: 0.9685
Epoch 3/5
21000/21000 [==============================] - 602s - loss: 0.1437 - acc: 0.9660 - val_loss: 0.1222 - val_acc: 0.9685
Epoch 4/5
21000/21000 [==============================] - 603s - loss: 0.1433 - acc: 0.9691 - val_loss: 0.0772 - val_acc: 0.9770
Epoch 5/5
21000/21000 [==============================] - 603s - loss: 0.1480 - acc: 0.9667 - val_loss: 0.1050 - val_acc: 0.9735

batches, preds = vgg.test(test_path, batch_size = batch_size*2)

Found 12500 images belonging to 1 classes.

isdog = preds[:,1]
isdog = isdog.clip(min=0.00000001, max=0.99999999)
filenames = batches.filenames
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
subm = np.stack([ids,isdog], axis=1)

submission_file_name = 'submission1.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')

