In [1]:
from __future__ import division


import numpy as np
from keras.models import Model, load_model, Sequential
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import *
from keras.callbacks import *
from keras.optimizers import Adam
from keras.objectives import categorical_crossentropy

import matplotlib.pyplot as plt
import glob
import os
from sklearn.externals import joblib

%matplotlib inline

img_size = 384


Using TensorFlow backend.

In [2]:
train_dir = '../data/train'
val_dir = '../data/validation'
test_dir = '../data/test'
labels = ['army', 'arabic_sign', 'burning_flag', 'desert', 'dutch_flag', 'islam', 'islamic_state', 'middle_east', 'muslim', 'neonazi', 'rifles', 'normal']

In [3]:
# for label in labels:
#     os.mkdir(os.path.join(val_dir, label))

In [4]:
nb_train = len(glob.glob(os.path.join(train_dir, '*/*.*')))
nb_val = len(glob.glob(os.path.join(val_dir, '*/*.*')))

In [5]:
# train_files = glob.glob(os.path.join(train_dir, '*/*'))
# train_files = np.random.permutation(train_files)
# val_files = train_files[:0.25*nb_train]
# for file in val_files:
#     os.rename(file, file.replace('train', 'validation'))

In [6]:
print 'training samples', nb_train
print 'val samples', nb_val

def preprocess_gen(gen):
    for X, y in gen:
        yield preprocess_input(X)/255., y
        
datagen_train = ImageDataGenerator(
    rotation_range=15.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    zoom_range=[0.8, 1.2],
    horizontal_flip=True)


training samples 5463
val samples 1821

In [7]:
terror_class = 'rifles'

print "Training {} versus normal photos...".format(terror_class)
base_model = VGG16(include_top=False, input_shape=(img_size, img_size, 3))
for layer in base_model.layers:
    layer.trainable = False
layer = Flatten()(base_model.output)
layer = BatchNormalization()(layer)
layer = Dense(512, activation='relu')(layer)
layer = BatchNormalization()(layer)

out_layer = Dense(2, activation='softmax')(layer)

model = Model(input=base_model.input, output=out_layer)
model.compile(Adam(lr=1e-4), 'categorical_crossentropy', metrics=['accuracy'])

csv_logger = CSVLogger('../log.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=0, verbose=1, factor=0.1)
checkpoint = ModelCheckpoint(filepath='../models/model.' + terror_class + '.{epoch:02d}-{val_loss}.hdf5', verbose=1, save_best_only=True)

train_gen = datagen_train.flow_from_directory(train_dir, target_size=(img_size, img_size), batch_size=16, shuffle=True, classes=[terror_class, 'normal'])
val_gen = ImageDataGenerator().flow_from_directory(val_dir, target_size=(img_size, img_size), batch_size=16, shuffle=False, classes=[terror_class, 'normal'])

train_gen = preprocess_gen(train_gen)
val_gen = preprocess_gen(val_gen)

model.fit_generator(train_gen, samples_per_epoch=nb_train, nb_epoch=5, verbose=1,
                    callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, nb_val_samples=nb_val,
                    nb_worker=4, pickle_safe=True)


Training rifles versus normal photos...
Found 1297 images belonging to 2 classes.
Found 468 images belonging to 2 classes.
Epoch 1/5
5460/5463 [============================>.] - ETA: 0s - loss: 0.8699 - acc: 0.7214   
/home/joris/anaconda2/envs/venv/lib/python2.7/site-packages/keras/engine/training.py:1573: UserWarning: Epoch comprised more than `samples_per_epoch` samples, which might affect learning results. Set `samples_per_epoch` correctly to avoid this warning.
  warnings.warn('Epoch comprised more than '
Epoch 00000: val_loss improved from inf to 0.44692, saving model to ../models/model.rifles.00-0.446915915569.hdf5
5476/5463 [==============================] - 236s - loss: 0.8676 - acc: 0.7222 - val_loss: 0.4469 - val_acc: 0.8488
Epoch 2/5
5460/5463 [============================>.] - ETA: 0s - loss: 0.3047 - acc: 0.8908  Epoch 00001: val_loss improved from 0.44692 to 0.36060, saving model to ../models/model.rifles.01-0.360604326774.hdf5
5476/5463 [==============================] - 232s - loss: 0.3040 - acc: 0.8912 - val_loss: 0.3606 - val_acc: 0.8922
Epoch 3/5
5460/5463 [============================>.] - ETA: 0s - loss: 0.1567 - acc: 0.9548  Epoch 00002: val_loss improved from 0.36060 to 0.27033, saving model to ../models/model.rifles.02-0.270332549564.hdf5
5476/5463 [==============================] - 232s - loss: 0.1565 - acc: 0.9549 - val_loss: 0.2703 - val_acc: 0.9154
Epoch 4/5
5460/5463 [============================>.] - ETA: 0s - loss: 0.0955 - acc: 0.9786  Epoch 00003: val_loss improved from 0.27033 to 0.24220, saving model to ../models/model.rifles.03-0.242201724313.hdf5
5476/5463 [==============================] - 232s - loss: 0.0954 - acc: 0.9786 - val_loss: 0.2422 - val_acc: 0.9383
Epoch 5/5
5460/5463 [============================>.] - ETA: 0s - loss: 0.0623 - acc: 0.9885  Epoch 00004: val_loss improved from 0.24220 to 0.21028, saving model to ../models/model.rifles.04-0.210282801045.hdf5
5476/5463 [==============================] - 232s - loss: 0.0623 - acc: 0.9885 - val_loss: 0.2103 - val_acc: 0.9349
Out[7]:
<keras.callbacks.History at 0x7f7ae73c0c90>

In [6]:
K.set_value(model.optimizer.lr, 1e-5)
model.fit_generator(train_gen, samples_per_epoch=nb_train, nb_epoch=5, verbose=1,
                    callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, nb_val_samples=nb_val,
                    nb_worker=4, pickle_safe=True)


Epoch 1/5
5456/5463 [============================>.] - ETA: 0s - loss: 0.0853 - acc: 0.9683  Epoch 00000: val_loss improved from 0.31719 to 0.30845, saving model to ../models/model.desert.00-0.308449587409.hdf5
5472/5463 [==============================] - 231s - loss: 0.0851 - acc: 0.9684 - val_loss: 0.3084 - val_acc: 0.9002
Epoch 2/5
5456/5463 [============================>.] - ETA: 0s - loss: 0.0876 - acc: 0.9674  Epoch 00001: val_loss improved from 0.30845 to 0.29201, saving model to ../models/model.desert.01-0.29200973414.hdf5
5472/5463 [==============================] - 231s - loss: 0.0874 - acc: 0.9675 - val_loss: 0.2920 - val_acc: 0.8958
Epoch 3/5
5456/5463 [============================>.] - ETA: 0s - loss: 0.0759 - acc: 0.9718  Epoch 00002: val_loss did not improve
5472/5463 [==============================] - 231s - loss: 0.0758 - acc: 0.9719 - val_loss: 0.2976 - val_acc: 0.9030
Epoch 4/5
5456/5463 [============================>.] - ETA: 0s - loss: 0.0750 - acc: 0.9734  Epoch 00003: val_loss improved from 0.29201 to 0.28262, saving model to ../models/model.desert.03-0.282624156051.hdf5
5472/5463 [==============================] - 231s - loss: 0.0749 - acc: 0.9735 - val_loss: 0.2826 - val_acc: 0.9041
Epoch 5/5
5456/5463 [============================>.] - ETA: 0s - loss: 0.0701 - acc: 0.9709  Epoch 00004: val_loss did not improve
5472/5463 [==============================] - 231s - loss: 0.0707 - acc: 0.9706 - val_loss: 0.2860 - val_acc: 0.8980
Out[6]:
<keras.callbacks.History at 0x7f1ea6590150>

In [8]:
best_model = load_model('../models/model.rifles.04-0.210282801045.hdf5')
i = 0
while i < len(best_model.layers) and not isinstance(best_model.layers[i], BatchNormalization):
    i += 1
print i


20

In [9]:
input = Input((73728,))
layer = best_model.layers[i](input)
layer = best_model.layers[i+1](layer)
layer = best_model.layers[i+2](layer)
layer = best_model.layers[i+3](layer)

dense_model = Model(input=input, output=layer)
dense_model.summary()
dense_model.save('rifles.hdf5')


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_2 (InputLayer)             (None, 73728)         0                                            
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 73728)         294912      input_2[0][0]                    
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 512)           37749248    batchnormalization_1[1][0]       
____________________________________________________________________________________________________
batchnormalization_2 (BatchNorma (None, 512)           2048        dense_1[1][0]                    
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 2)             1026        batchnormalization_2[1][0]       
====================================================================================================
Total params: 38,047,234
Trainable params: 37,898,754
Non-trainable params: 148,480
____________________________________________________________________________________________________

In [9]:
dense_model.input_shape


Out[9]:
(None, 131072)

In [16]:
model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_2 (InputLayer)             (None, 3, 384, 384)   0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, 64, 384, 384)  1792        input_2[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, 64, 384, 384)  36928       block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 64, 192, 192)  0           block1_conv2[0][0]               
____________________________________________________________________________________________________
block2_conv1 (Convolution2D)     (None, 128, 192, 192) 73856       block1_pool[0][0]                
____________________________________________________________________________________________________
block2_conv2 (Convolution2D)     (None, 128, 192, 192) 147584      block2_conv1[0][0]               
____________________________________________________________________________________________________
block2_pool (MaxPooling2D)       (None, 128, 96, 96)   0           block2_conv2[0][0]               
____________________________________________________________________________________________________
block3_conv1 (Convolution2D)     (None, 256, 96, 96)   295168      block2_pool[0][0]                
____________________________________________________________________________________________________
block3_conv2 (Convolution2D)     (None, 256, 96, 96)   590080      block3_conv1[0][0]               
____________________________________________________________________________________________________
block3_conv3 (Convolution2D)     (None, 256, 96, 96)   590080      block3_conv2[0][0]               
____________________________________________________________________________________________________
block3_pool (MaxPooling2D)       (None, 256, 48, 48)   0           block3_conv3[0][0]               
____________________________________________________________________________________________________
block4_conv1 (Convolution2D)     (None, 512, 48, 48)   1180160     block3_pool[0][0]                
____________________________________________________________________________________________________
block4_conv2 (Convolution2D)     (None, 512, 48, 48)   2359808     block4_conv1[0][0]               
____________________________________________________________________________________________________
block4_conv3 (Convolution2D)     (None, 512, 48, 48)   2359808     block4_conv2[0][0]               
____________________________________________________________________________________________________
block4_pool (MaxPooling2D)       (None, 512, 24, 24)   0           block4_conv3[0][0]               
____________________________________________________________________________________________________
block5_conv1 (Convolution2D)     (None, 512, 24, 24)   2359808     block4_pool[0][0]                
____________________________________________________________________________________________________
block5_conv2 (Convolution2D)     (None, 512, 24, 24)   2359808     block5_conv1[0][0]               
____________________________________________________________________________________________________
block5_conv3 (Convolution2D)     (None, 512, 24, 24)   2359808     block5_conv2[0][0]               
____________________________________________________________________________________________________
block5_pool (MaxPooling2D)       (None, 512, 12, 12)   0           block5_conv3[0][0]               
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 73728)         0           block5_pool[0][0]                
____________________________________________________________________________________________________
batchnormalization_1 (BatchNorma (None, 73728)         294912      flatten_1[0][0]                  
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 1024)          75498496    batchnormalization_1[0][0]       
____________________________________________________________________________________________________
batchnormalization_2 (BatchNorma (None, 1024)          4096        dense_1[0][0]                    
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 2)             2050        batchnormalization_2[0][0]       
====================================================================================================
Total params: 90,514,242
Trainable params: 75,650,050
Non-trainable params: 14,864,192
____________________________________________________________________________________________________

In [ ]: