In [10]:
import os, random, glob, pickle, collections, math, json
import numpy as np
import pandas as pd
from __future__ import division
from __future__ import print_function
# import ujson as json
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
sess = tf.Session()

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K

In [4]:
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy

In [1]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../RFCN/JPEGImages/'
TRAIN_CROP_DIR = '../data/train_crop/'
TEST_CROP_DIR = '../data/test_stg1_crop/'
RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CROP_MODEL = 'resnet50_FT38_Hybrid_woNoF'
if not os.path.exists('./' + CROP_MODEL):
    os.mkdir('./' + CROP_MODEL)
CHECKPOINT_DIR = './' + CROP_MODEL + '/checkpoint/'
if not os.path.exists(CHECKPOINT_DIR):
LOG_DIR = './' + CROP_MODEL + '/log/'
if not os.path.exists(LOG_DIR):
OUTPUT_DIR = './' + CROP_MODEL + '/output/'
if not os.path.exists(OUTPUT_DIR):
ROWS = 224
COLS = 224
def featurewise_center(x):
    mean = np.mean(x, axis=0, keepdims=True)
    mean = np.mean(mean, axis=(1,2), keepdims=True)
    x_centered = x - mean
    return x_centered

def mean(x):
    mean = np.mean(x, axis=0)
    mean = np.mean(mean, axis=(0,1))
    return mean

def load_img(path, bbox, target_size=None):
    img =
#     img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    width_cropped, height_cropped = cropped.size
    if height_cropped > width_cropped: cropped = cropped.transpose(method=2)  
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]), Image.BILINEAR)
    return cropped

def preprocess_input(x, mean):
    #resnet50 image preprocessing
#     'RGB'->'BGR'
#     x = x[:, :, ::-1]
#     x /= 255.
    x[:, :, 0] -= mean[0]
    x[:, :, 1] -= mean[1]
    x[:, :, 2] -= mean[2]
    return x

def get_best_model(checkpoint_dir = CHECKPOINT_DIR):
    files = glob.glob(checkpoint_dir+'*')
    val_losses = [float(f.split('-')[-1][:-5]) for f in files]
    index = val_losses.index(min(val_losses))
    print('Loading model from checkpoint file ' + files[index])
    model = load_model(files[index])
    model_name = files[index].split('/')[-1]
    print('Loading model Done!')
    return (model, model_name)

NameError                                 Traceback (most recent call last)
<ipython-input-1-5f389aba4b3c> in <module>()
      5 RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
      6 CROP_MODEL = 'resnet50_FT38_Hybrid_woNoF'
----> 7 if not os.path.exists('./' + CROP_MODEL):
      8     os.mkdir('./' + CROP_MODEL)
      9 CHECKPOINT_DIR = './' + CROP_MODEL + '/checkpoint/'

NameError: name 'os' is not defined

In [3]:
# GTbbox_df = ['image_file','crop_index','crop_class','xmin',''ymin','xmax','ymax']

file_name = 'GTbbox_df.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle(OUTPUT_DIR+file_name)
    print ('Generating file '+file_name)       
    GTbbox_df = pd.DataFrame(columns=['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax'])  
    for c in CROP_CLASSES:
        j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r'))
        for l in j: 
            filename = l["filename"]
            head, image_file = os.path.split(filename)
            basename, file_extension = os.path.splitext(image_file) 
            image =
            width_image, height_image = image.size
            for i in range(len(l["annotations"])):
                a = l["annotations"][i]
                xmin = (a["x"])
                ymin = (a["y"])
                width = (a["width"])
                height = (a["height"])
                xmax = xmin + width
                ymax = ymin + height
                assert max(xmin,0)<min(xmax,width_image)
                assert max(ymin,0)<min(ymax,height_image)
                if a["class"] != c: print(GTbbox_df.tail(1))  
    test_size = GTbbox_df.shape[0]-int(math.ceil(GTbbox_df.shape[0]*0.8/BATCHSIZE)*BATCHSIZE)
    train_ind, valid_ind = train_test_split(range(GTbbox_df.shape[0]), test_size=test_size, random_state=1986, stratify=GTbbox_df['crop_class'])
    GTbbox_df['split'] = ['train' if i in train_ind else 'valid' for i in range(GTbbox_df.shape[0])]

Loading from file GTbbox_df.pickle

In [4]:
#Load data

def data_from_df(df):
    X = np.ndarray((df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    y = np.zeros((df.shape[0], len(CROP_CLASSES)), dtype=K.floatx())
    i = 0
    for index,row in df.iterrows():
        image_file = row['image_file']
        fish = row['crop_class']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X[i] = np.asarray(cropped)
        y[i,CROP_CLASSES.index(fish)] = 1
        i += 1
    return (X, y)

def data_load(name):
    file_name = 'data_'+name+'_{}_{}.pickle'.format(ROWS, COLS)
    if os.path.exists(OUTPUT_DIR+file_name):
        print ('Loading from file '+file_name)
        with open(OUTPUT_DIR+file_name, 'rb') as f:
            data = pickle.load(f)
        X = data['X']
        y = data['y']
        print ('Generating file '+file_name)
        if name=='train' or name=='valid': 
            df = GTbbox_df[GTbbox_df['split']==name]
        elif name=='all':
            df = GTbbox_df
            print('Invalid name '+name)
        X, y = data_from_df(df)

        data = {'X': X,'y': y}
        with open(OUTPUT_DIR+file_name, 'wb') as f:
            pickle.dump(data, f)
    return (X, y)
X_train, y_train = data_load('train')
X_valid, y_valid = data_load('valid')
print('Loading data done.')
print('train sample ', X_train.shape[0])
print('valid sample ', X_valid.shape[0])
X_train = X_train.astype(np.float32)
X_valid = X_valid.astype(np.float32)
print('Convert to float32 done.')
X_train /= 255.
X_valid /= 255.
print('Rescale by 255 done.')
X_train_centerd = featurewise_center(X_train)
print('mean of X_train is ', mean(X_train))
X_valid_centerd = featurewise_center(X_valid)
print('mean of X_valid is ', mean(X_valid))
print('Featurewise centered done.')

Loading from file data_train_224_224.pickle
Loading from file data_valid_224_224.pickle
Loading data done.
train sample  3584
valid sample  787
Convert to float32 done.
Rescale by 255 done.
mean of X_train is  [ 0.40704539  0.43806663  0.39486334]
mean of X_valid is  [ 0.4065561   0.43584293  0.39404479]
Featurewise centered done.

In [5]:
# #class weight = n_samples / (n_classes * np.bincount(y))
# class_weight_fish = dict(GTbbox_df.groupby('crop_class').size())
# class_weight = {}
# n_samples = GTbbox_df.shape[0]
# for key,value in class_weight_fish.items():
#         class_weight[CROP_CLASSES.index(key)] = n_samples / (len(CROP_CLASSES)*value)
# class_weight

class_weight_fish = dict(GTbbox_df.groupby('crop_class').size())
class_weight = {}
ref = max(class_weight_fish.values())
for key,value in class_weight_fish.items():
    class_weight[CROP_CLASSES.index(key)] = ref/value

{0: 1.0,
 1: 8.212418300653594,
 2: 19.944444444444443,
 3: 23.933333333333334,
 4: 7.5465465465465469,
 5: 13.296296296296296,
 6: 3.1451814768460578}

In [10]:
#data preprocessing

train_datagen = ImageDataGenerator(
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCHSIZE, shuffle=True, seed=None)
assert X_train.shape[0]%BATCHSIZE==0
steps_per_epoch = int(X_train.shape[0]/BATCHSIZE)

In [7]:

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='auto', epsilon=0.001, cooldown=0, min_lr=0)

tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)

In [16]:
def create_model_VGG16():
    model = Sequential()

    model.add(Conv2D(64, (3, 3), padding='same', name='block1_conv1'))
    model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same', name='block1_conv2'))
    model.add(Conv2D(128, (3, 3), padding='same', name='block2_conv1'))
    model.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same', name='block2_conv2'))

    model.add(Conv2D(256, (3, 3), padding='same', name='block3_conv1'))
    model.add(Conv2D(256, (3, 3), padding='same', name='block3_conv2'))
    model.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same', name='block3_conv3'))
    model.add(Conv2D(512, (3, 3), padding='same', name='block4_conv1'))
    model.add(Conv2D(512, (3, 3), padding='same', name='block4_conv2'))
    model.add(Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='block4_conv3'))
    model.add(Conv2D(512, (3, 3), padding='same', name='block5_conv1'))
    model.add(Conv2D(512, (3, 3), padding='same', name='block5_conv2'))
    model.add(Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='block5_conv3'))
#     model.add(Dropout(0.8))
    model.add(Dense(len(CROP_CLASSES), activation='softmax'))
    return model

In [2]:
def average_gradients(tower_grads):
    """Calculate the average gradient for each shared variable across all towers.
    Note that this function provides a synchronization point across all towers.
    tower_grads: List of lists of (gradient, variable) tuples. The outer list
      is over individual gradients. The inner list is over the gradient
      calculation for each tower.
     List of pairs of (gradient, variable) where the gradient has been averaged
     across all towers.
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.

        # Average over the 'tower' dimension.
        grad = tf.concat(0, grads)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
    return average_grads

In [ ]:
with tf.device('/cpu:0'):
    x_0 = tf.placeholder(tf.float32, shape=(None,ROWS,COLS,3))
    y_0 = tf.placeholder(tf.float32, shape=(None, 10))
    x_1 = tf.placeholder(tf.float32, shape=(None,ROWS,COLS,3))
    y_1 = tf.placeholder(tf.float32, shape=(None, 10))
    # shared model living on CPU:0
    # it won't actually be run during training; it acts as an op template
    # and as a repository for shared variables
    model = create_model_VGG16()
    opt = tf.train.GradientDescentOptimizer(lr)
#     tower_grads = []
#     for i in xrange(NUM_GPUS):
#         with tf.device('/gpu:%d' % i):
#             with tf.name_scope('%s_%d' % (tower, i)) as scope:
#                 # Calculate the loss for one tower of the CIFAR model. This function
#                 # constructs the entire CIFAR model but shares the variables across
#                 # all towers.
#                 preds = model(x_0)  # all ops in the replica will live on GPU:0
#                 loss = tf.reduce_mean(categorical_crossentropy(y_0, preds_0))

#                 # Calculate the gradients for the batch of data on this CIFAR tower.
#                 grads = opt.compute_gradients(loss)

#                 # Keep track of the gradients across all towers.
#                 tower_grads.append(grads)

# replica 0
with tf.device('/gpu:0'):
    preds_0 = model(x_0)  # all ops in the replica will live on GPU:0
    loss_0 = tf.reduce_mean(categorical_crossentropy(y_0, preds_0))
    grads_0 = opt.compute_gradients(loss)

# # replica 1
# with tf.device('/gpu:1'):
#     preds_1 = model(x_1)  # all ops in the replica will live on GPU:1

# merge outputs on CPU
with tf.device('/cpu:0'):    
    loss = loss_0
    grads = average_gradients([grads_0])
    apply_gradient_op = opt.apply_gradients(grads)
    update_ops = []
    for old_value, new_value in model.updates:
        update_ops.append(tf.assign(old_value, new_value))
    train_op =, *update_ops)
# we only run the `preds` tensor, so that only the two
# replicas on GPU get run (plus the merge op on CPU)
with sess.as_default():
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        batch = mnist_data.train.next_batch(50)
        _, loss_value =[train_op, loss], feed_dict={x_0: batch[0], y_0: batch[1], K.learning_phase(): 1})
        duration = time.time() - start_time

In [ ]:
#train from scratch

from keras.applications.vgg16 import VGG16

base_model = VGG16(include_top=False, weights=None, input_shape=(224,224,3), pooling='avg')
x = base_model.output
# x = GlobalAveragePooling2D()(x)
# x = Flatten()(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(CROP_CLASSES), activation="softmax")(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)

Epoch 1/30
55/56 [============================>.] - ETA: 3s - loss: 7.9107 - acc: 0.1372  Epoch 00000: val_loss did not improve
56/56 [==============================] - 200s - loss: 7.8731 - acc: 0.1381 - val_loss: 1.9485 - val_acc: 0.1487
Epoch 2/30
55/56 [============================>.] - ETA: 3s - loss: 7.4868 - acc: 0.0952  Epoch 00001: val_loss improved from 1.93584 to 1.93313, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.001-1.9331.hdf5
56/56 [==============================] - 201s - loss: 7.5243 - acc: 0.0946 - val_loss: 1.9331 - val_acc: 0.0712
Epoch 3/30
55/56 [============================>.] - ETA: 3s - loss: 7.2188 - acc: 0.0628  Epoch 00002: val_loss improved from 1.93313 to 1.88218, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.002-1.8822.hdf5
56/56 [==============================] - 201s - loss: 7.2241 - acc: 0.0619 - val_loss: 1.8822 - val_acc: 0.0724
Epoch 4/30
55/56 [============================>.] - ETA: 3s - loss: 6.9229 - acc: 0.0730  Epoch 00003: val_loss did not improve
56/56 [==============================] - 201s - loss: 6.9774 - acc: 0.0725 - val_loss: 1.9073 - val_acc: 0.0775
Epoch 5/30
44/56 [======================>.......] - ETA: 40s - loss: 6.5007 - acc: 0.0778 

In [ ]:
with tf.device('/cpu:0'):
    x = tf.placeholder(tf.float32, shape=(None, 784))

    # shared model living on CPU:0
    # it won't actually be run during training; it acts as an op template
    # and as a repository for shared variables
    model = Sequential()
    model.add(Dense(32, activation='relu', input_dim=784))
    model.add(Dense(10, activation='softmax'))

# replica 0
with tf.device('/gpu:0'):
    output_0 = model(x)  # all ops in the replica will live on GPU:0

# replica 1
with tf.device('/gpu:1'):
    output_1 = model(x)  # all ops in the replica will live on GPU:1

# merge outputs on CPU
with tf.device('/cpu:0'):
    preds = 0.5 * (output_0 + output_1)

# we only run the `preds` tensor, so that only the two
# replicas on GPU get run (plus the merge op on CPU)
output_value =[preds], feed_dict={x: data})

In [18]:
#top layer training

from keras.applications.vgg16 import VGG16

base_model = VGG16(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# x = Flatten()(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(CROP_CLASSES), activation="softmax", kernel_initializer="glorot_normal")(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-3)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)

/usr/lib/python2.7/dist-packages/ipykernel/ UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=Tensor("de..., inputs=Tensor("in...)`
Epoch 1/30
27/28 [===========================>..] - ETA: 2s - loss: 7.7168 - acc: 0.1522 Epoch 00000: val_loss improved from inf to 1.90791, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.000-1.9079.hdf5
28/28 [==============================] - 73s - loss: 7.6927 - acc: 0.1540 - val_loss: 1.9079 - val_acc: 0.0978
Epoch 2/30
27/28 [===========================>..] - ETA: 2s - loss: 7.4846 - acc: 0.1748 Epoch 00001: val_loss improved from 1.90791 to 1.81988, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.001-1.8199.hdf5
28/28 [==============================] - 70s - loss: 7.4654 - acc: 0.1797 - val_loss: 1.8199 - val_acc: 0.2694
Epoch 3/30
27/28 [===========================>..] - ETA: 2s - loss: 7.2855 - acc: 0.2263 Epoch 00002: val_loss improved from 1.81988 to 1.74448, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.002-1.7445.hdf5
28/28 [==============================] - 70s - loss: 7.2616 - acc: 0.2324 - val_loss: 1.7445 - val_acc: 0.4422
Epoch 4/30
27/28 [===========================>..] - ETA: 2s - loss: 7.1436 - acc: 0.2729 Epoch 00003: val_loss did not improve
28/28 [==============================] - 70s - loss: 7.0712 - acc: 0.2732 - val_loss: 1.7748 - val_acc: 0.2579
Epoch 5/30
27/28 [===========================>..] - ETA: 2s - loss: 6.7608 - acc: 0.2977 Epoch 00004: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.7718 - acc: 0.2974 - val_loss: 1.7636 - val_acc: 0.3126
Epoch 6/30
27/28 [===========================>..] - ETA: 2s - loss: 6.7911 - acc: 0.3432 Epoch 00005: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.8124 - acc: 0.3384 - val_loss: 1.8264 - val_acc: 0.2058
Epoch 7/30
27/28 [===========================>..] - ETA: 2s - loss: 6.6596 - acc: 0.2497 Epoch 00006: val_loss improved from 1.74448 to 1.62738, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.006-1.6274.hdf5
28/28 [==============================] - 70s - loss: 6.6208 - acc: 0.2567 - val_loss: 1.6274 - val_acc: 0.4816
Epoch 8/30
27/28 [===========================>..] - ETA: 2s - loss: 6.5136 - acc: 0.3284 Epoch 00007: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.5326 - acc: 0.3256 - val_loss: 1.6966 - val_acc: 0.3151
Epoch 9/30
27/28 [===========================>..] - ETA: 2s - loss: 6.3156 - acc: 0.3050 Epoch 00008: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.3584 - acc: 0.3050 - val_loss: 1.6584 - val_acc: 0.3723
Epoch 10/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1714 - acc: 0.3093 Epoch 00009: val_loss improved from 1.62738 to 1.58501, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.009-1.5850.hdf5
28/28 [==============================] - 70s - loss: 6.1751 - acc: 0.3133 - val_loss: 1.5850 - val_acc: 0.4816
Epoch 11/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1054 - acc: 0.3492 Epoch 00010: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.1497 - acc: 0.3479 - val_loss: 1.6277 - val_acc: 0.3126
Epoch 12/30
27/28 [===========================>..] - ETA: 2s - loss: 6.2473 - acc: 0.3084 Epoch 00011: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.2675 - acc: 0.3128 - val_loss: 1.6294 - val_acc: 0.3596
Epoch 13/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1853 - acc: 0.3012 Epoch 00012: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.1724 - acc: 0.3027 - val_loss: 1.6156 - val_acc: 0.3456
Epoch 14/30
27/28 [===========================>..] - ETA: 2s - loss: 6.0290 - acc: 0.3345 Epoch 00013: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.0486 - acc: 0.3340 - val_loss: 1.6120 - val_acc: 0.3380
Epoch 15/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6712 - acc: 0.3600 Epoch 00014: val_loss improved from 1.58501 to 1.57953, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.014-1.5795.hdf5
28/28 [==============================] - 70s - loss: 5.6849 - acc: 0.3583 - val_loss: 1.5795 - val_acc: 0.3443
Epoch 16/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7416 - acc: 0.3339 Epoch 00015: val_loss improved from 1.57953 to 1.53152, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.015-1.5315.hdf5
28/28 [==============================] - 70s - loss: 5.7510 - acc: 0.3357 - val_loss: 1.5315 - val_acc: 0.4168
Epoch 17/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6953 - acc: 0.3637 Epoch 00016: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.7544 - acc: 0.3664 - val_loss: 1.5724 - val_acc: 0.3520
Epoch 18/30
27/28 [===========================>..] - ETA: 2s - loss: 5.8233 - acc: 0.3345 Epoch 00017: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.8066 - acc: 0.3318 - val_loss: 1.5735 - val_acc: 0.3596
Epoch 19/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7534 - acc: 0.3429 Epoch 00018: val_loss improved from 1.53152 to 1.49215, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.018-1.4922.hdf5
28/28 [==============================] - 71s - loss: 5.7191 - acc: 0.3471 - val_loss: 1.4922 - val_acc: 0.4358
Epoch 20/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6824 - acc: 0.3776 Epoch 00019: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.6767 - acc: 0.3767 - val_loss: 1.5388 - val_acc: 0.4066
Epoch 21/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3459 - acc: 0.3579 Epoch 00020: val_loss improved from 1.49215 to 1.45708, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.020-1.4571.hdf5
28/28 [==============================] - 70s - loss: 5.3862 - acc: 0.3591 - val_loss: 1.4571 - val_acc: 0.4435
Epoch 22/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2556 - acc: 0.4207 Epoch 00021: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.2532 - acc: 0.4182 - val_loss: 1.4993 - val_acc: 0.4028
Epoch 23/30
27/28 [===========================>..] - ETA: 2s - loss: 5.4009 - acc: 0.3918 Epoch 00022: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.4241 - acc: 0.3887 - val_loss: 1.5027 - val_acc: 0.4091
Epoch 24/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7004 - acc: 0.3484 Epoch 00023: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.7178 - acc: 0.3502 - val_loss: 1.4978 - val_acc: 0.4053
Epoch 25/30
27/28 [===========================>..] - ETA: 2s - loss: 5.5212 - acc: 0.3828 Epoch 00024: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.5192 - acc: 0.3853 - val_loss: 1.5472 - val_acc: 0.3469
Epoch 26/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2935 - acc: 0.3788 Epoch 00025: val_loss improved from 1.45708 to 1.44253, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.025-1.4425.hdf5
28/28 [==============================] - 70s - loss: 5.2945 - acc: 0.3795 - val_loss: 1.4425 - val_acc: 0.4676
Epoch 27/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2318 - acc: 0.3837 Epoch 00026: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.2567 - acc: 0.3839 - val_loss: 1.4473 - val_acc: 0.4435
Epoch 28/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3121 - acc: 0.4135 Epoch 00027: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.3075 - acc: 0.4099 - val_loss: 1.5131 - val_acc: 0.4003
Epoch 29/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3136 - acc: 0.3953 Epoch 00028: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.3020 - acc: 0.3951 - val_loss: 1.4609 - val_acc: 0.4104
Epoch 30/30
27/28 [===========================>..] - ETA: 2s - loss: 5.1836 - acc: 0.3964 Epoch 00029: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.1668 - acc: 0.3954 - val_loss: 1.4685 - val_acc: 0.3926
<keras.callbacks.History at 0x7f8712143a10>

In [ ]:
# VGG16
# fine tuning
start_layer = 1

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5')

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
optimizer = Adam(lr=1e-3)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=300, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)
#Resnet50 #top layer training from keras.applications.resnet50 import ResNet50 base_model = ResNet50(weights='imagenet', include_top=False) x = base_model.output x = GlobalAveragePooling2D()(x) # x = Flatten()(x) # x = Dense(256, init='glorot_normal', activation='relu')(x) # x = LeakyReLU(alpha=0.33)(x) # x = Dropout(0.5)(x) # x = Dense(256, init='glorot_normal', activation='relu')(x) # x = Dense(256, init='glorot_normal')(x) # x = LeakyReLU(alpha=0.33)(x) x = Dropout(0.8)(x) predictions = Dense(len(CROP_CLASSES), init='glorot_normal', activation='softmax')(x) model = Model(input=base_model.input, output=predictions) # first: train only the top layers (which were randomly initialized) for layer in base_model.layers: layer.trainable = False # compile the model (should be done *after* setting layers to non-trainable) optimizer = Adam(lr=LEARNINGRATE) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # train the model on the new data for a few epochs model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=30, verbose=1, callbacks=[model_checkpoint, tensorboard], validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)
# Resnet50 # fine tuning # 164 conv5c+top # 142 conv5+top # 80 conv4+conv5+top # 38 conv3+conv4+conv5+top start_layer = 38 model, model_name = get_best_model() # print('Loading model from weights.004-0.0565.hdf5') # model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5') for layer in model.layers[:start_layer]: layer.trainable = False for layer in model.layers[start_layer:]: layer.trainable = True # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate optimizer = Adam(lr=1e-5) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=300, verbose=1, callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)

In [ ]:
#resume training

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5')

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=100, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)

In [4]:
#test prepare

test_model, test_model_name = get_best_model(checkpoint_dir='./resnet50_FT38_CW_STGTrain/checkpoint/')
# print('Loading model from weights.004-0.0565.hdf5')
# test_model = load_model('./checkpoints/checkpoint2/weights.004-0.0565.hdf5')

def test_generator(df, mean, datagen = None, batch_size = BATCHSIZE):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
        i = 0
        for index,row in batch_df.iterrows():
            image_file = row['image_file']
            bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
            cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x /= 255.
            if datagen is not None: x = datagen.random_transform(x)            
            x = preprocess_input(x, mean)
            batch_x[i] = x
            i += 1
        if batch_index%50 == 0: print('batch_index', batch_index)
test_aug_datagen = ImageDataGenerator(

Loading model from checkpoint file ./resnet50_FT38_CW_STGTrain/checkpoint/weights.000-0.0327.hdf5
Loading model Done!

In [5]:
train_mean = [0.37698776,  0.41491762,  0.38681713]

In [ ]:
train_mean = train_datagen.mean
valid_mean = valid_datagen.mean
X_train_centered = featurewise_center(X_train)
X_valid_centered = featurewise_center(X_valid)

In [ ]:
#validation data fish logloss
valid_pred = test_model.predict(X_valid_centered, batch_size=BATCHSIZE, verbose=1)
# valid_pred = test_model.predict_generator(test_generator(df=valid_df, mean=valid_mean),
#                                           val_samples=valid_df.shape[0], nb_worker=1, pickle_safe=False)
valid_logloss_df = pd.DataFrame(columns=['logloss','class'])
for i in range(y_valid.shape[0]):
    index = np.argmax(y_valid[i,:])
    fish = FISH_CLASSES[index]
    logloss = -math.log(valid_pred[i,index])

train_pred = test_model.predict(X_train_centered, batch_size=BATCHSIZE, verbose=1)
# train_pred = test_model.predict_generator(test_generator(df=train_df, ),
#                                           val_samples=train_df.shape[0], nb_worker=1, pickle_safe=False)
train_logloss_df = pd.DataFrame(columns=['logloss','class'])
for i in range(y_train.shape[0]):
    index = np.argmax(y_train[i,:])
    fish = FISH_CLASSES[index]
    logloss = -math.log(train_pred[i,index])

In [8]:
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']

file_name = 'GTbbox_CROPpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_CROPpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
    print ('Generating file '+file_name) 
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean), 
                                                  val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds = np.zeros((GTbbox_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean, datagen=test_aug_datagen), 
                                                       val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT'])
    GTbbox_CROPpred_df = pd.concat([GTbbox_df,CROPpred_df], axis=1)
    GTbbox_CROPpred_df['logloss'] = GTbbox_CROPpred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)

#logloss of every fish class

Loading from file GTbbox_CROPpred_df_weights.000-0.0327.hdf5_.pickle
ALB      0.076577
BET      0.139025
DOL      0.126520
LAG      0.000761
NoF      0.051943
OTHER    0.133949
SHARK    0.018328
YFT      0.090739
Name: logloss, dtype: float64

In [9]:
# RFCNbbox_RFCNpred_df = ['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
#                          'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                          'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']
# select fish_conf >= CONF_THRESH

file_name = 'RFCNbbox_RFCNpred_df_conf{:.2f}.pickle'.format(CONF_THRESH)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
    print ('Generating file '+file_name)        
    RFCNbbox_RFCNpred_df = pd.DataFrame(columns=['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
                                                  'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
                                                  'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']) 

    with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'.pkl','rb') as f:
        detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1') 
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
        train_file_labels = f.readlines()
    assert len(detections_full_AGNOSTICnms) == len(test_files)
    count = np.zeros(len(detections_full_AGNOSTICnms))
    for im in range(len(detections_full_AGNOSTICnms)):
        if im%1000 == 0: print(im)
        basename = test_files[im][:9]
        if im<1000:
            image_class = '--'
            for i in range(len(train_file_labels)):
                if train_file_labels[i][:9] == basename:
                    image_class = train_file_labels[i][10:-1]
        image ='/'+basename+'.jpg')
        width_image, height_image = image.size
        bboxes = []
        detects_im = detections_full_AGNOSTICnms[im]
        for i in range(len(detects_im)):
#             if np.sum(detects_im[i,5:]) >= CONF_THRESH:
            if np.max(detects_im[i,5:]) >= CONF_THRESH:
        count[im] = len(bboxes)
        if len(bboxes) == 0:
            ind = np.argmax(np.sum(detects_im[:,5:], axis=1))
        bboxes = np.asarray(bboxes)

        for j in range(len(bboxes)):    
            bbox = bboxes[j]
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            assert max(xmin,0)<min(xmax,width_image)
            assert max(ymin,0)<min(ymax,height_image)

Loading from file RFCNbbox_RFCNpred_df_conf0.80.pickle

In [31]:

image_file crop_index crop_class xmin ymin xmax ymax ALB BET DOL LAG NoF OTHER SHARK YFT logloss
0 img_00003.jpg 0.0 ALB 377.000000 66.000000 730.000000 173.000000 9.999806e-01 1.153381e-06 6.197113e-10 1.895012e-10 3.613221e-06 1.272149e-05 5.510684e-09 2.063266e-06 1.943130e-05
1 img_00003.jpg 1.0 ALB 670.000000 95.000000 1008.000000 219.000000 9.999139e-01 3.154530e-08 1.504183e-09 1.004528e-09 1.135657e-06 8.434706e-05 8.872343e-12 5.066360e-07 8.607281e-05
2 img_00003.jpg 2.0 ALB 820.000000 328.000000 1123.000000 485.000000 9.999986e-01 3.259925e-07 2.411850e-12 2.287089e-07 1.287768e-07 6.981027e-07 5.469245e-12 6.689053e-09 1.430512e-06
3 img_00003.jpg 3.0 ALB 291.000000 122.000000 643.000000 407.000000 9.998715e-01 7.074304e-07 1.498078e-13 5.582359e-11 1.212325e-04 6.655146e-08 2.109827e-15 6.572765e-06 1.285159e-04
4 img_00010.jpg 0.0 ALB 651.000000 422.000000 746.000000 612.000000 9.999821e-01 1.612292e-05 1.606051e-09 8.399101e-09 5.414718e-07 1.055654e-06 8.472713e-09 1.611917e-07 1.788155e-05
5 img_00010.jpg 1.0 ALB 831.000000 305.000000 943.000000 418.000000 9.967868e-01 5.258065e-05 2.600038e-07 1.262084e-08 1.224622e-03 1.223431e-03 1.717334e-07 7.120565e-04 3.218340e-03
6 img_00012.jpg 0.0 ALB 471.000000 513.000000 627.000000 703.000000 9.999943e-01 3.739872e-08 1.850646e-06 1.132046e-08 1.897254e-06 6.289488e-09 9.704899e-11 1.929350e-06 5.722062e-06
7 img_00015.jpg 0.0 ALB 233.000000 341.000000 444.000000 435.000000 9.743453e-01 1.293397e-03 1.068411e-06 3.617434e-07 2.392042e-02 2.577601e-04 8.125739e-07 1.809537e-04 2.598955e-02
8 img_00019.jpg 0.0 ALB 155.000000 393.000000 293.000000 468.000000 9.727021e-01 9.626866e-10 6.829089e-13 5.678039e-09 2.318743e-02 6.181457e-08 3.705326e-10 4.110374e-03 2.767742e-02
9 img_00020.jpg 0.0 ALB 586.000000 537.000000 710.000000 719.000000 9.955834e-01 3.461484e-03 5.535413e-09 3.351136e-06 3.796670e-07 8.154724e-05 1.728447e-06 8.681710e-04 4.426427e-03
10 img_00020.jpg 1.0 ALB 690.000000 454.000000 847.000000 687.000000 9.998964e-01 2.346682e-05 2.276121e-12 6.263109e-10 5.679273e-08 7.904898e-05 3.889341e-11 1.027095e-06 1.035982e-04
11 img_00020.jpg 2.0 ALB 614.000000 377.000000 756.000000 484.000000 9.999816e-01 6.440972e-07 7.907461e-08 1.131726e-11 9.268691e-06 1.255067e-06 5.675534e-06 1.460077e-06 1.835840e-05
12 img_00020.jpg 3.0 ALB 724.000000 360.000000 816.000000 537.000000 9.999635e-01 7.285932e-06 1.114230e-08 3.857879e-09 1.466452e-05 1.255418e-05 8.424390e-09 2.041774e-06 3.647871e-05
13 img_00020.jpg 4.0 ALB 630.000000 327.000000 756.000000 438.000000 9.998150e-01 2.325187e-06 5.186432e-07 1.736279e-11 4.357657e-05 5.044311e-08 1.080510e-06 1.373883e-04 1.849703e-04
14 img_00029.jpg 0.0 ALB 607.000000 343.000000 706.000000 524.000000 9.999987e-01 3.594790e-07 1.605573e-08 6.174470e-09 2.296128e-07 5.097974e-07 3.114929e-11 2.182999e-07 1.311303e-06
15 img_00029.jpg 1.0 ALB 693.000000 331.000000 773.000000 485.000000 9.990757e-01 1.419920e-05 8.545166e-06 5.261857e-05 7.995616e-04 4.140537e-05 3.945559e-09 7.982659e-06 9.247763e-04
16 img_00032.jpg 0.0 ALB 520.000000 513.000000 627.000000 568.000000 9.148430e-01 1.440050e-04 6.734094e-06 2.674154e-07 8.218089e-02 1.380336e-06 6.068142e-08 2.823662e-03 8.900279e-02
17 img_00037.jpg 0.0 ALB 291.000000 278.000000 383.000000 436.000000 9.988926e-01 7.055594e-05 3.687090e-05 2.988373e-07 7.559397e-05 6.331778e-06 1.440459e-08 9.177037e-04 1.108008e-03
18 img_00038.jpg 0.0 ALB 745.000000 254.000000 831.000000 401.000000 9.678084e-01 3.162137e-02 7.209297e-08 6.797074e-07 4.583938e-04 2.350453e-05 1.698614e-05 7.062314e-05 3.272112e-02
19 img_00039.jpg 0.0 ALB 393.000000 115.000000 653.000000 220.000000 9.999824e-01 8.426422e-07 2.922295e-09 5.091283e-09 8.092207e-08 4.532214e-08 1.761962e-07 1.647602e-05 1.764313e-05
20 img_00041.jpg 0.0 ALB 774.000000 188.000000 1074.000000 372.000000 9.973723e-01 1.446992e-04 1.484089e-06 1.172362e-03 1.302605e-03 9.352129e-08 1.967828e-08 6.406744e-06 2.631189e-03
21 img_00041.jpg 1.0 ALB 562.000000 84.000000 890.000000 193.000000 9.963587e-01 1.568967e-03 1.909340e-08 9.761413e-08 2.041460e-03 2.864800e-06 8.028692e-08 2.767326e-05 3.647953e-03
22 img_00043.jpg 0.0 ALB 556.000000 373.000000 612.000000 520.000000 9.999943e-01 1.869347e-08 6.470538e-09 3.601910e-10 3.813006e-06 1.243006e-06 3.678304e-08 5.485905e-07 5.722062e-06
23 img_00045.jpg 0.0 ALB 725.000000 432.000000 838.000000 525.000000 9.997253e-01 1.044408e-06 4.624509e-11 1.380859e-12 2.997921e-08 9.887623e-07 3.264254e-11 2.726966e-04 2.747555e-04
24 img_00055.jpg 0.0 ALB 189.000000 382.000000 375.000000 469.000000 9.991695e-01 2.037858e-04 3.110706e-05 3.521669e-07 2.683227e-06 4.896912e-07 3.195880e-07 5.917859e-04 8.308762e-04
25 img_00057.jpg 0.0 ALB 456.000000 509.000000 550.000000 666.000000 9.803254e-01 5.703667e-05 8.106243e-05 6.375744e-08 4.867250e-03 1.483713e-04 7.475342e-06 1.451331e-02 1.987072e-02
26 img_00074.jpg 0.0 ALB 415.000000 142.000000 594.000000 225.000000 9.999074e-01 8.572769e-06 5.425484e-05 2.907593e-09 8.140572e-06 3.617875e-07 8.698826e-07 2.055169e-05 9.262991e-05
27 img_00085.jpg 0.0 ALB 528.000000 145.000000 904.000000 277.000000 9.999944e-01 4.824967e-06 2.861454e-13 1.500103e-10 5.636679e-09 1.107719e-07 8.330427e-12 7.240401e-07 5.602852e-06
28 img_00090.jpg 0.0 ALB 373.000000 69.000000 736.000000 176.000000 9.999552e-01 1.398818e-07 3.503078e-11 5.925299e-11 1.950427e-07 4.438670e-05 8.072463e-10 4.472725e-08 4.482370e-05
29 img_00090.jpg 1.0 ALB 664.000000 90.000000 1011.000000 217.000000 9.990360e-01 3.220962e-08 1.046357e-09 4.747873e-10 9.323478e-06 9.547356e-04 2.068044e-11 1.235434e-08 9.645105e-04
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4341 img_07624.jpg 0.0 YFT 40.425532 313.475177 427.659574 521.276596 2.164462e-07 8.217400e-08 1.176050e-10 9.168304e-13 3.708680e-08 7.436453e-13 8.649925e-11 9.999996e-01 3.576279e-07
4342 img_07633.jpg 0.0 YFT 449.645390 404.255319 776.595745 734.042553 1.944061e-07 1.345823e-10 2.600791e-09 1.223723e-13 5.715361e-04 1.499527e-11 7.718879e-11 9.994282e-01 5.719509e-04
4343 img_07644.jpg 0.0 YFT 31.205674 284.397163 441.134752 561.702128 4.146482e-08 7.857637e-14 2.391757e-12 4.072475e-16 1.115367e-05 6.872426e-15 5.370510e-13 9.999888e-01 1.120574e-05
4344 img_07648.jpg 0.0 YFT 496.453901 458.865248 863.829787 715.602837 1.056789e-06 8.189900e-12 2.191024e-08 3.227101e-14 2.480658e-01 4.621349e-10 2.824466e-11 7.519331e-01 2.851079e-01
4345 img_07649.jpg 0.0 YFT 395.744681 338.297872 539.007092 399.290780 9.800946e-01 2.885848e-05 6.444725e-10 2.656619e-10 5.918504e-06 4.666089e-10 2.298762e-10 1.987064e-02 3.918512e+00
4346 img_07653.jpg 0.0 YFT 392.907801 339.716312 492.907801 527.659574 2.288577e-04 5.256919e-09 1.510330e-12 2.889810e-13 9.491819e-09 3.025020e-09 1.956473e-10 9.997712e-01 2.288484e-04
4347 img_07655.jpg 0.0 YFT 355.319149 412.765957 508.510638 492.198582 1.258860e-02 9.553523e-07 2.006075e-09 8.764372e-11 5.649769e-07 1.484699e-08 1.848887e-08 9.874098e-01 1.267016e-02
4348 img_07665.jpg 0.0 YFT 386.524823 397.163121 615.602837 529.787234 7.730698e-08 9.188389e-07 7.162744e-13 4.260208e-13 1.254958e-10 1.479765e-11 2.911281e-11 9.999990e-01 9.536748e-07
4349 img_07706.jpg 0.0 YFT 739.716312 119.858156 1107.092199 265.957447 6.675404e-03 5.656304e-03 7.849219e-04 5.090093e-06 1.270020e-03 2.034237e-06 4.498025e-07 9.856058e-01 1.449883e-02
4350 img_07712.jpg 0.0 YFT 384.397163 395.035461 621.985816 527.659574 9.987452e-08 6.990318e-07 3.480624e-13 1.654098e-13 5.825423e-10 2.003397e-11 4.399122e-10 9.999992e-01 8.344654e-07
4351 img_07714.jpg 0.0 YFT 518.439716 499.290780 965.957447 751.063830 4.431536e-04 5.329949e-03 5.520845e-04 3.762749e-08 1.303821e-02 1.918395e-06 1.929933e-06 9.806327e-01 1.955728e-02
4352 img_07731.jpg 0.0 YFT 0.709220 285.106383 407.092199 492.198582 2.290689e-05 1.586421e-05 9.612826e-06 8.063775e-10 2.111648e-04 6.028992e-10 5.919560e-09 9.997404e-01 2.596119e-04
4353 img_07742.jpg 0.0 YFT 392.907801 119.858156 604.964539 216.312057 7.540400e-04 5.190315e-07 1.048923e-06 2.994529e-12 5.577776e-07 3.216247e-08 4.288129e-09 9.992439e-01 7.564305e-04
4354 img_07747.jpg 0.0 YFT 396.453901 36.170213 573.758865 139.007092 4.107560e-02 1.682030e-06 1.991541e-06 3.832559e-10 7.867641e-05 2.695598e-08 5.084877e-09 9.588420e-01 4.202893e-02
4355 img_07750.jpg 0.0 YFT 382.269504 400.709220 618.439716 525.531915 1.154416e-07 3.628240e-07 7.613718e-13 1.997993e-13 3.715605e-11 6.477192e-12 5.375092e-11 9.999995e-01 4.768373e-07
4356 img_07752.jpg 0.0 YFT 516.312057 100.709220 1063.120567 470.921986 2.969756e-06 3.421748e-11 4.237403e-10 1.173359e-13 2.443959e-05 4.258927e-12 1.328220e-07 9.999725e-01 2.753773e-05
4357 img_07759.jpg 0.0 YFT 372.340426 34.751773 563.829787 142.553191 7.952327e-04 3.425041e-08 2.242637e-06 1.119358e-11 6.487022e-06 2.139202e-07 9.518618e-09 9.991958e-01 8.045691e-04
4358 img_07761.jpg 0.0 YFT 378.723404 327.659574 623.404255 600.000000 2.354784e-02 1.261270e-04 2.567733e-06 2.596141e-08 3.898747e-02 1.521842e-03 5.412410e-03 9.304017e-01 7.213880e-02
4359 img_07765.jpg 0.0 YFT 240.425532 397.163121 436.170213 506.382979 1.646531e-04 7.871562e-07 1.974605e-12 9.064095e-11 1.574225e-05 5.046791e-09 5.182184e-09 9.998189e-01 1.811549e-04
4360 img_07775.jpg 0.0 YFT 358.865248 383.687943 544.680851 507.801418 1.533139e-04 6.141526e-09 1.837603e-12 1.696163e-12 2.080864e-08 9.277617e-08 1.598433e-08 9.998466e-01 1.534341e-04
4361 img_07782.jpg 0.0 YFT 719.148936 495.744681 1050.354610 770.212766 3.887371e-04 2.830485e-04 9.643040e-07 5.700894e-10 6.147433e-05 5.678395e-07 2.822596e-07 9.992649e-01 7.353744e-04
4362 img_07828.jpg 0.0 YFT 545.390071 437.588652 944.680851 599.290780 4.730524e-05 4.999148e-08 2.016533e-07 3.433206e-13 4.449790e-07 5.377348e-11 6.311234e-10 9.999520e-01 4.804250e-05
4363 img_07849.jpg 0.0 YFT 409.219858 329.787234 502.127660 533.333333 5.100721e-02 2.538231e-06 3.884194e-10 2.187348e-09 3.148336e-07 8.814292e-06 3.255179e-06 9.489779e-01 5.236972e-02
4364 img_07852.jpg 0.0 YFT 246.808511 399.290780 431.914894 506.382979 2.338640e-01 5.934151e-06 1.941234e-10 2.950447e-09 1.761298e-04 7.111951e-09 1.075934e-08 7.659540e-01 2.666332e-01
4365 img_07853.jpg 0.0 YFT 367.375887 420.567376 534.751773 507.092199 4.134394e-03 5.552590e-06 3.968423e-10 6.416629e-10 1.457083e-07 2.094415e-08 5.564397e-11 9.958599e-01 4.148673e-03
4366 img_07854.jpg 0.0 YFT 297.163121 402.836879 504.255319 531.205674 9.831114e-03 1.452477e-05 4.623424e-09 4.330351e-09 1.166227e-06 6.302580e-06 1.310425e-07 9.901467e-01 9.902168e-03
4367 img_07891.jpg 0.0 YFT 403.546099 504.964539 1150.354610 743.971631 2.132043e-02 5.357747e-04 1.069194e-08 6.377197e-08 1.312335e-03 3.185128e-07 9.797323e-06 9.768212e-01 2.345167e-02
4368 img_07901.jpg 0.0 YFT 104.255319 348.936170 414.184397 512.765957 2.305388e-03 1.219822e-08 8.169907e-08 1.499224e-10 4.947213e-04 7.767779e-10 1.084323e-09 9.971998e-01 2.804154e-03
4369 img_07911.jpg 0.0 YFT 190.780142 53.900709 562.411348 219.858156 5.444570e-04 1.197029e-09 2.904432e-11 1.601409e-07 3.330995e-04 2.670902e-10 2.680843e-11 9.991223e-01 8.780638e-04
4370 img_07911.jpg 1.0 YFT 756.028369 112.765957 1136.879433 329.787234 5.873860e-03 4.908578e-06 1.231487e-07 4.685965e-06 1.182474e-03 1.162095e-06 1.040281e-07 9.929327e-01 7.092415e-03

4371 rows × 16 columns

file_name = 'data_test_Crop_{}_{}.pickle'.format(ROWS, COLS) if os.path.exists(OUTPUT_DIR+file_name): print ('Loading from file '+file_name) with open(OUTPUT_DIR+file_name, 'rb') as f: data_test = pickle.load(f) X_test_crop = data_train['X_test_crop'] else: print ('Generating file '+file_name) X_test_crop = np.ndarray((RFCNbbox_RFCNpred_df.shape[0], ROWS, COLS, 3), dtype=np.uint8) i = 0 for index,row in RFCNbbox_RFCNpred_df.iterrows(): image_file = row['image_file'] bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']] cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS)) X_test_crop[i] = np.asarray(cropped) i += 1 #save data to file data_test = {'X_test_crop': X_test_crop} with open(OUTPUT_DIR+file_name, 'wb') as f: pickle.dump(data_test, f) print('Loading data done.') X_test_crop = X_test_crop.astype(np.float32) print('Convert to float32 done.') X_test_crop /= 255. print('Rescale by 255 done.')

In [32]:
file_name = 'data_trainfish_Crop_{}_{}.pickle'.format(ROWS, COLS)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    with open(OUTPUT_DIR+file_name, 'rb') as f:
        data_trainfish = pickle.load(f)
    X_trainfish_crop = data_train['X_trainfish_crop']
    print ('Generating file '+file_name)

    GTbbox_CROPpred_fish_df = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['crop_class']!='NoF']
    X_trainfish_crop = np.ndarray((GTbbox_CROPpred_fish_df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    i = 0
    for index,row in GTbbox_CROPpred_fish_df.iterrows():
        image_file = row['image_file']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X_trainfish_crop[i] = np.asarray(cropped)
        i += 1
    #save data to file
    data_trainfish = {'X_trainfish_crop': X_trainfish_crop}
    with open(OUTPUT_DIR+file_name, 'wb') as f:
        pickle.dump(data_trainfish, f)
print('Loading data done.')
X_trainfish_crop = X_trainfish_crop.astype(np.float32)
print('Convert to float32 done.')
X_trainfish_crop /= 255.
print('Rescale by 255 done.')

Generating file data_trainfish_Crop_224_224.pickle
Loading data done.
Convert to float32 done.
Rescale by 255 done.

In [33]:

array([ 0.40706199,  0.4373979 ,  0.39489502], dtype=float32)

In [28]:

array([ 0.41078389,  0.43895897,  0.39912957], dtype=float32)

In [35]:
# test_mean = [0.41019869,  0.43978861,  0.39873621]
test_mean = [0.37698776,  0.41491762,  0.38681713]

In [55]:
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
    print ('Generating file '+file_name)  
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=test_mean), 
                                                  val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds = np.zeros((RFCNbbox_RFCNpred_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=test_mean, datagen=test_aug_datagen), 
                                                       val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB_CROP', 'BET_CROP', 'DOL_CROP', 'LAG_CROP', 'NoF_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP'])
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.concat([RFCNbbox_RFCNpred_df,CROPpred_df], axis=1)
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF'] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF_RFCN']
    for fish in ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']:
        RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.apply(lambda row: (1-row['NoF_RFCN'])*row[[fish+'_CROP']]/(1-row['NoF_CROP']) if row['NoF_CROP']!=1 else 0, axis=1)
#     for fish in FISH_CLASSES:
#         RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish+'_CROP']


Generating file RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_weights.000-0.0327.hdf5_.pickle
batch_index 0

In [56]:
# clsMaxAve and hybrid RFCNpred&CROPpred such that RFCNpred for NoF and CROPpred for fish
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'test_pred_df_Hybrid_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    test_pred_df = pd.read_pickle(OUTPUT_DIR+file_name)
    print ('Generating file '+file_name)  
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    test_pred_df = pd.DataFrame(columns=['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT'])  
    for j in range(len(test_files)): 
        image_file = test_files[j][:-1]+'.jpg'
        test_pred_im_df = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file'] == image_file,
                                                                       ['image_class', 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
        image_class = test_pred_im_df.iloc[0]['image_class']
        test_pred_im_df.drop('image_class', axis=1, inplace=True)
        max_score = test_pred_im_df.max(axis=1)
        max_cls = test_pred_im_df.idxmax(axis=1)
        test_pred_im_df['max_score'] = max_score
        test_pred_im_df['max_cls'] = max_cls
        test_pred_im_df['Count'] = test_pred_im_df.groupby(['max_cls'])['max_cls'].transform('count')
        idx = test_pred_im_df.groupby(['max_cls'])['max_score'].transform(max) == test_pred_im_df['max_score']
        test_pred_im_clsMax_df = test_pred_im_df.loc[idx,['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'Count']]
        test_pred_im_clsMax_array = test_pred_im_clsMax_df.values
        pred = np.average(test_pred_im_clsMax_array[:,:-1], axis=0, weights=test_pred_im_clsMax_array[:,-1], returned=False).tolist()
        if image_class!='--':
            ind = FISH_CLASSES.index(image_class)
            logloss = -math.log(pred[ind]) 
            logloss = np.nan
        test_pred_im_clsMaxAve = [logloss,image_class,image_file]


Generating file test_pred_df_Hybrid_weights.000-0.0327.hdf5_.pickle

In [ ]:
#### visualization
# RFCNbbox_RFCNpred_CROPpred_df = ['image_class', 'image_file','crop_index','x_min','y_min','x_max','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN'
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP']
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

for j in range(test_pred_df.shape[0]):
    image_logloss = test_pred_df.iat[j,0]
    image_class = test_pred_df.iat[j,1]
    image_file = test_pred_df.iat[j,2]
    if j<1000 and j%30== 0:
    im ='../RFCN/JPEGImages/'+image_file)
    im = np.asarray(im)
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.imshow(im, aspect='equal')
    RFCN_dets = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file']==image_file]
    for index,row in RFCN_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        RFCN = [row['NoF_RFCN'],row['ALB_RFCN'],row['BET_RFCN'],row['DOL_RFCN'],row['LAG_RFCN'],row['OTHER_RFCN'],row['SHARK_RFCN'],row['YFT_RFCN']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_RFCN = max(RFCN)
        score_CROP = max(CROP)
        index_RFCN = RFCN.index(score_RFCN)
        index_CROP = CROP.index(score_CROP)
        class_RFCN = FISH_CLASSES[index_RFCN]
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=2))
        ax.text(bbox[0], bbox[1] - 2, 'RFCN_{:s} {:.3f} \nHYBRID_{:s} {:.3f}'.format(class_RFCN, score_RFCN, class_CROP, score_CROP), bbox=dict(facecolor='red', alpha=0.5), fontsize=8, color='white')       
    GT_dets = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['image_file']==image_file]
    for index,row in GT_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_CROP = max(CROP)
        index_CROP = CROP.index(score_CROP)
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='green', linewidth=2))
        ax.text(bbox[0], bbox[3] + 40, 'GT_{:s} \nCROP_{:s} {:.3f}'.format(row['crop_class'], class_CROP, score_CROP), bbox=dict(facecolor='green', alpha=0.5), fontsize=8, color='white')
    ax.set_title(('Image {:s}    FISH {:s}    logloss {}').format(image_file, image_class, image_logloss), fontsize=10) 

In [58]:
T = 1
test_pred_array = test_pred_df[FISH_CLASSES].values
test_pred_T_array = np.exp(np.log(test_pred_array)/T)
test_pred_T_array = test_pred_T_array/np.sum(test_pred_T_array, axis=1, keepdims=True)
test_pred_T_df = pd.DataFrame(test_pred_T_array, columns=FISH_CLASSES)
test_pred_T_df = pd.concat([test_pred_df[['image_class','image_file']],test_pred_T_df], axis=1)

#add logloss
test_pred_T_df['logloss'] = test_pred_T_df.apply(lambda row: -math.log(row[row['image_class']]) if row['image_class']!='--' else np.nan, axis=1)

#calculate train logloss
train_logloss = test_pred_T_df['logloss'].mean()
print('logloss of train is', train_logloss )

--            NaN
ALB      0.043386
BET      0.234966
DOL      0.140754
LAG      0.000835
NoF      0.126442
OTHER    0.105386
SHARK    0.147953
YFT      0.080953
Name: logloss, dtype: float64
logloss of train is 0.08309862497761385

In [62]:
#test submission
submission = test_pred_T_df.loc[:999,['image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
submission.rename(columns={'image_file':'image'}, inplace=True)
sub_file = 'RFCN_AGONOSTICnms_'+RFCN_MODEL+'_'+CROP_MODEL+'_clsMaxAve_conf{:.2f}_T{}_'.format(CONF_THRESH, T)+'{:.4f}'.format(train_logloss)+'.csv'
submission.to_csv(sub_file, index=False)
