In [10]:
import os, random, glob, pickle, collections, math, json
import numpy as np
import pandas as pd
from __future__ import division
from __future__ import print_function
# import ujson as json
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
sess = tf.Session()

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')
K.set_session(sess)

In [4]:
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy

In [1]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../RFCN/JPEGImages/'
TRAIN_CROP_DIR = '../data/train_crop/'
TEST_CROP_DIR = '../data/test_stg1_crop/'
RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CROP_MODEL = 'resnet50_FT38_Hybrid_woNoF'
if not os.path.exists('./' + CROP_MODEL):
    os.mkdir('./' + CROP_MODEL)
CHECKPOINT_DIR = './' + CROP_MODEL + '/checkpoint/'
if not os.path.exists(CHECKPOINT_DIR):
    os.mkdir(CHECKPOINT_DIR)
LOG_DIR = './' + CROP_MODEL + '/log/'
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)
OUTPUT_DIR = './' + CROP_MODEL + '/output/'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
CROP_CLASSES=FISH_CLASSES[:]
CROP_CLASSES.remove('NoF')
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 64
LEARNINGRATE = 1e-4
NUM_GPUS = 1
def featurewise_center(x):
    mean = np.mean(x, axis=0, keepdims=True)
    mean = np.mean(mean, axis=(1,2), keepdims=True)
    x_centered = x - mean
    return x_centered

def mean(x):
    mean = np.mean(x, axis=0)
    mean = np.mean(mean, axis=(0,1))
    return mean

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
#     img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    width_cropped, height_cropped = cropped.size
    if height_cropped > width_cropped: cropped = cropped.transpose(method=2)  
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]), Image.BILINEAR)
    return cropped

def preprocess_input(x, mean):
    #resnet50 image preprocessing
#     'RGB'->'BGR'
#     x = x[:, :, ::-1]
#     x /= 255.
    x[:, :, 0] -= mean[0]
    x[:, :, 1] -= mean[1]
    x[:, :, 2] -= mean[2]
    return x

def get_best_model(checkpoint_dir = CHECKPOINT_DIR):
    files = glob.glob(checkpoint_dir+'*')
    val_losses = [float(f.split('-')[-1][:-5]) for f in files]
    index = val_losses.index(min(val_losses))
    print('Loading model from checkpoint file ' + files[index])
    model = load_model(files[index])
    model_name = files[index].split('/')[-1]
    print('Loading model Done!')
    return (model, model_name)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-5f389aba4b3c> in <module>()
      5 RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
      6 CROP_MODEL = 'resnet50_FT38_Hybrid_woNoF'
----> 7 if not os.path.exists('./' + CROP_MODEL):
      8     os.mkdir('./' + CROP_MODEL)
      9 CHECKPOINT_DIR = './' + CROP_MODEL + '/checkpoint/'

NameError: name 'os' is not defined

In [3]:
# GTbbox_df = ['image_file','crop_index','crop_class','xmin',''ymin','xmax','ymax']

file_name = 'GTbbox_df.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)       
    GTbbox_df = pd.DataFrame(columns=['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax'])  
    
    for c in CROP_CLASSES:
        print(c)
        j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r'))
        for l in j: 
            filename = l["filename"]
            head, image_file = os.path.split(filename)
            basename, file_extension = os.path.splitext(image_file) 
            image = Image.open(TEST_DIR+image_file)
            width_image, height_image = image.size
            for i in range(len(l["annotations"])):
                a = l["annotations"][i]
                xmin = (a["x"])
                ymin = (a["y"])
                width = (a["width"])
                height = (a["height"])
                xmax = xmin + width
                ymax = ymin + height
                assert max(xmin,0)<min(xmax,width_image)
                assert max(ymin,0)<min(ymax,height_image)
                GTbbox_df.loc[len(GTbbox_df)]=[image_file,i,a["class"],max(xmin,0),max(ymin,0),min(xmax,width_image),min(ymax,height_image)]
                if a["class"] != c: print(GTbbox_df.tail(1))  
    
    test_size = GTbbox_df.shape[0]-int(math.ceil(GTbbox_df.shape[0]*0.8/BATCHSIZE)*BATCHSIZE)
    train_ind, valid_ind = train_test_split(range(GTbbox_df.shape[0]), test_size=test_size, random_state=1986, stratify=GTbbox_df['crop_class'])
    GTbbox_df['split'] = ['train' if i in train_ind else 'valid' for i in range(GTbbox_df.shape[0])]
    GTbbox_df.to_pickle(OUTPUT_DIR+file_name)


Loading from file GTbbox_df.pickle

In [4]:
#Load data

def data_from_df(df):
    X = np.ndarray((df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    y = np.zeros((df.shape[0], len(CROP_CLASSES)), dtype=K.floatx())
    i = 0
    for index,row in df.iterrows():
        image_file = row['image_file']
        fish = row['crop_class']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X[i] = np.asarray(cropped)
        y[i,CROP_CLASSES.index(fish)] = 1
        i += 1
    return (X, y)

def data_load(name):
    file_name = 'data_'+name+'_{}_{}.pickle'.format(ROWS, COLS)
    if os.path.exists(OUTPUT_DIR+file_name):
        print ('Loading from file '+file_name)
        with open(OUTPUT_DIR+file_name, 'rb') as f:
            data = pickle.load(f)
        X = data['X']
        y = data['y']
    else:
        print ('Generating file '+file_name)
        
        if name=='train' or name=='valid': 
            df = GTbbox_df[GTbbox_df['split']==name]
        elif name=='all':
            df = GTbbox_df
        else:
            print('Invalid name '+name)
    
        X, y = data_from_df(df)

        data = {'X': X,'y': y}
        with open(OUTPUT_DIR+file_name, 'wb') as f:
            pickle.dump(data, f)
    return (X, y)
X_train, y_train = data_load('train')
X_valid, y_valid = data_load('valid')
       
print('Loading data done.')
print('train sample ', X_train.shape[0])
print('valid sample ', X_valid.shape[0])
X_train = X_train.astype(np.float32)
X_valid = X_valid.astype(np.float32)
print('Convert to float32 done.')
X_train /= 255.
X_valid /= 255.
print('Rescale by 255 done.')
X_train_centerd = featurewise_center(X_train)
print('mean of X_train is ', mean(X_train))
X_valid_centerd = featurewise_center(X_valid)
print('mean of X_valid is ', mean(X_valid))
print('Featurewise centered done.')


Loading from file data_train_224_224.pickle
Loading from file data_valid_224_224.pickle
Loading data done.
train sample  3584
valid sample  787
Convert to float32 done.
Rescale by 255 done.
mean of X_train is  [ 0.40704539  0.43806663  0.39486334]
mean of X_valid is  [ 0.4065561   0.43584293  0.39404479]
Featurewise centered done.

In [5]:
# #class weight = n_samples / (n_classes * np.bincount(y))
# class_weight_fish = dict(GTbbox_df.groupby('crop_class').size())
# class_weight = {}
# n_samples = GTbbox_df.shape[0]
# for key,value in class_weight_fish.items():
#         class_weight[CROP_CLASSES.index(key)] = n_samples / (len(CROP_CLASSES)*value)
# class_weight

class_weight_fish = dict(GTbbox_df.groupby('crop_class').size())
class_weight = {}
ref = max(class_weight_fish.values())
for key,value in class_weight_fish.items():
    class_weight[CROP_CLASSES.index(key)] = ref/value
class_weight


Out[5]:
{0: 1.0,
 1: 8.212418300653594,
 2: 19.944444444444443,
 3: 23.933333333333334,
 4: 7.5465465465465469,
 5: 13.296296296296296,
 6: 3.1451814768460578}

In [10]:
#data preprocessing

train_datagen = ImageDataGenerator(
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCHSIZE, shuffle=True, seed=None)
assert X_train.shape[0]%BATCHSIZE==0
steps_per_epoch = int(X_train.shape[0]/BATCHSIZE)

In [7]:
#callbacks

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
        
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='auto', epsilon=0.001, cooldown=0, min_lr=0)

tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)

In [16]:
def create_model_VGG16():
    model = Sequential()

    model.add(Conv2D(64, (3, 3), padding='same', name='block1_conv1'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same', name='block1_conv2'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Conv2D(128, (3, 3), padding='same', name='block2_conv1'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same', name='block2_conv2'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))

    model.add(Conv2D(256, (3, 3), padding='same', name='block3_conv1'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(256, (3, 3), padding='same', name='block3_conv2'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same', name='block3_conv3'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Conv2D(512, (3, 3), padding='same', name='block4_conv1'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(512, (3, 3), padding='same', name='block4_conv2'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='block4_conv3'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(Conv2D(512, (3, 3), padding='same', name='block5_conv1'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(512, (3, 3), padding='same', name='block5_conv2'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    model.add(Conv2D(512, (3, 3), strides=(2, 2), padding='same', name='block5_conv3'))
    model.add(BatchNormalization())
    model.add(LeakyReLU(alpha=0.33))
    
    model.add(GlobalAveragePooling2D(dim_ordering='tf'))
#     model.add(Dropout(0.8))
    model.add(Dense(len(CROP_CLASSES), activation='softmax'))
    
    return model

In [2]:
def average_gradients(tower_grads):
    """Calculate the average gradient for each shared variable across all towers.
    Note that this function provides a synchronization point across all towers.
    Args:
    tower_grads: List of lists of (gradient, variable) tuples. The outer list
      is over individual gradients. The inner list is over the gradient
      calculation for each tower.
    Returns:
     List of pairs of (gradient, variable) where the gradient has been averaged
     across all towers.
    """
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(0, grads)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [ ]:
with tf.device('/cpu:0'):
    x_0 = tf.placeholder(tf.float32, shape=(None,ROWS,COLS,3))
    y_0 = tf.placeholder(tf.float32, shape=(None, 10))
    x_1 = tf.placeholder(tf.float32, shape=(None,ROWS,COLS,3))
    y_1 = tf.placeholder(tf.float32, shape=(None, 10))
    
    # shared model living on CPU:0
    # it won't actually be run during training; it acts as an op template
    # and as a repository for shared variables
    model = create_model_VGG16()
    
    opt = tf.train.GradientDescentOptimizer(lr)
    
#     tower_grads = []
#     for i in xrange(NUM_GPUS):
#         with tf.device('/gpu:%d' % i):
#             with tf.name_scope('%s_%d' % (tower, i)) as scope:
#                 # Calculate the loss for one tower of the CIFAR model. This function
#                 # constructs the entire CIFAR model but shares the variables across
#                 # all towers.
#                 preds = model(x_0)  # all ops in the replica will live on GPU:0
#                 loss = tf.reduce_mean(categorical_crossentropy(y_0, preds_0))

#                 # Calculate the gradients for the batch of data on this CIFAR tower.
#                 grads = opt.compute_gradients(loss)

#                 # Keep track of the gradients across all towers.
#                 tower_grads.append(grads)

    
    
# replica 0
with tf.device('/gpu:0'):
    preds_0 = model(x_0)  # all ops in the replica will live on GPU:0
    loss_0 = tf.reduce_mean(categorical_crossentropy(y_0, preds_0))
    grads_0 = opt.compute_gradients(loss)

# # replica 1
# with tf.device('/gpu:1'):
#     preds_1 = model(x_1)  # all ops in the replica will live on GPU:1

# merge outputs on CPU
with tf.device('/cpu:0'):    
    loss = loss_0
    grads = average_gradients([grads_0])
    apply_gradient_op = opt.apply_gradients(grads)
    update_ops = []
    for old_value, new_value in model.updates:
        update_ops.append(tf.assign(old_value, new_value))
    train_op = tf.group(apply_gradient_op, *update_ops)
    
    
# we only run the `preds` tensor, so that only the two
# replicas on GPU get run (plus the merge op on CPU)
with sess.as_default():
    for step in xrange(FLAGS.max_steps):
        start_time = time.time()
        batch = mnist_data.train.next_batch(50)
        _, loss_value = sess.run([train_op, loss], feed_dict={x_0: batch[0], y_0: batch[1], K.learning_phase(): 1})
        duration = time.time() - start_time

In [ ]:
#VGG16
#train from scratch

from keras.applications.vgg16 import VGG16

base_model = VGG16(include_top=False, weights=None, input_shape=(224,224,3), pooling='avg')
x = base_model.output
# x = GlobalAveragePooling2D()(x)
# x = Flatten()(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(CROP_CLASSES), activation="softmax")(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)


Epoch 1/30
55/56 [============================>.] - ETA: 3s - loss: 7.9107 - acc: 0.1372  Epoch 00000: val_loss did not improve
56/56 [==============================] - 200s - loss: 7.8731 - acc: 0.1381 - val_loss: 1.9485 - val_acc: 0.1487
Epoch 2/30
55/56 [============================>.] - ETA: 3s - loss: 7.4868 - acc: 0.0952  Epoch 00001: val_loss improved from 1.93584 to 1.93313, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.001-1.9331.hdf5
56/56 [==============================] - 201s - loss: 7.5243 - acc: 0.0946 - val_loss: 1.9331 - val_acc: 0.0712
Epoch 3/30
55/56 [============================>.] - ETA: 3s - loss: 7.2188 - acc: 0.0628  Epoch 00002: val_loss improved from 1.93313 to 1.88218, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.002-1.8822.hdf5
56/56 [==============================] - 201s - loss: 7.2241 - acc: 0.0619 - val_loss: 1.8822 - val_acc: 0.0724
Epoch 4/30
55/56 [============================>.] - ETA: 3s - loss: 6.9229 - acc: 0.0730  Epoch 00003: val_loss did not improve
56/56 [==============================] - 201s - loss: 6.9774 - acc: 0.0725 - val_loss: 1.9073 - val_acc: 0.0775
Epoch 5/30
44/56 [======================>.......] - ETA: 40s - loss: 6.5007 - acc: 0.0778 

In [ ]:
with tf.device('/cpu:0'):
    x = tf.placeholder(tf.float32, shape=(None, 784))

    # shared model living on CPU:0
    # it won't actually be run during training; it acts as an op template
    # and as a repository for shared variables
    model = Sequential()
    model.add(Dense(32, activation='relu', input_dim=784))
    model.add(Dense(10, activation='softmax'))

# replica 0
with tf.device('/gpu:0'):
    output_0 = model(x)  # all ops in the replica will live on GPU:0

# replica 1
with tf.device('/gpu:1'):
    output_1 = model(x)  # all ops in the replica will live on GPU:1

# merge outputs on CPU
with tf.device('/cpu:0'):
    preds = 0.5 * (output_0 + output_1)

# we only run the `preds` tensor, so that only the two
# replicas on GPU get run (plus the merge op on CPU)
output_value = sess.run([preds], feed_dict={x: data})

In [18]:
#VGG16
#top layer training

from keras.applications.vgg16 import VGG16

base_model = VGG16(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# x = Flatten()(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
# x = Dense(256, init='glorot_normal', activation='relu')(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(CROP_CLASSES), activation="softmax", kernel_initializer="glorot_normal")(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=1e-3)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)


/usr/lib/python2.7/dist-packages/ipykernel/__main__.py:17: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=Tensor("de..., inputs=Tensor("in...)`
Epoch 1/30
27/28 [===========================>..] - ETA: 2s - loss: 7.7168 - acc: 0.1522 Epoch 00000: val_loss improved from inf to 1.90791, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.000-1.9079.hdf5
28/28 [==============================] - 73s - loss: 7.6927 - acc: 0.1540 - val_loss: 1.9079 - val_acc: 0.0978
Epoch 2/30
27/28 [===========================>..] - ETA: 2s - loss: 7.4846 - acc: 0.1748 Epoch 00001: val_loss improved from 1.90791 to 1.81988, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.001-1.8199.hdf5
28/28 [==============================] - 70s - loss: 7.4654 - acc: 0.1797 - val_loss: 1.8199 - val_acc: 0.2694
Epoch 3/30
27/28 [===========================>..] - ETA: 2s - loss: 7.2855 - acc: 0.2263 Epoch 00002: val_loss improved from 1.81988 to 1.74448, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.002-1.7445.hdf5
28/28 [==============================] - 70s - loss: 7.2616 - acc: 0.2324 - val_loss: 1.7445 - val_acc: 0.4422
Epoch 4/30
27/28 [===========================>..] - ETA: 2s - loss: 7.1436 - acc: 0.2729 Epoch 00003: val_loss did not improve
28/28 [==============================] - 70s - loss: 7.0712 - acc: 0.2732 - val_loss: 1.7748 - val_acc: 0.2579
Epoch 5/30
27/28 [===========================>..] - ETA: 2s - loss: 6.7608 - acc: 0.2977 Epoch 00004: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.7718 - acc: 0.2974 - val_loss: 1.7636 - val_acc: 0.3126
Epoch 6/30
27/28 [===========================>..] - ETA: 2s - loss: 6.7911 - acc: 0.3432 Epoch 00005: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.8124 - acc: 0.3384 - val_loss: 1.8264 - val_acc: 0.2058
Epoch 7/30
27/28 [===========================>..] - ETA: 2s - loss: 6.6596 - acc: 0.2497 Epoch 00006: val_loss improved from 1.74448 to 1.62738, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.006-1.6274.hdf5
28/28 [==============================] - 70s - loss: 6.6208 - acc: 0.2567 - val_loss: 1.6274 - val_acc: 0.4816
Epoch 8/30
27/28 [===========================>..] - ETA: 2s - loss: 6.5136 - acc: 0.3284 Epoch 00007: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.5326 - acc: 0.3256 - val_loss: 1.6966 - val_acc: 0.3151
Epoch 9/30
27/28 [===========================>..] - ETA: 2s - loss: 6.3156 - acc: 0.3050 Epoch 00008: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.3584 - acc: 0.3050 - val_loss: 1.6584 - val_acc: 0.3723
Epoch 10/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1714 - acc: 0.3093 Epoch 00009: val_loss improved from 1.62738 to 1.58501, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.009-1.5850.hdf5
28/28 [==============================] - 70s - loss: 6.1751 - acc: 0.3133 - val_loss: 1.5850 - val_acc: 0.4816
Epoch 11/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1054 - acc: 0.3492 Epoch 00010: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.1497 - acc: 0.3479 - val_loss: 1.6277 - val_acc: 0.3126
Epoch 12/30
27/28 [===========================>..] - ETA: 2s - loss: 6.2473 - acc: 0.3084 Epoch 00011: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.2675 - acc: 0.3128 - val_loss: 1.6294 - val_acc: 0.3596
Epoch 13/30
27/28 [===========================>..] - ETA: 2s - loss: 6.1853 - acc: 0.3012 Epoch 00012: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.1724 - acc: 0.3027 - val_loss: 1.6156 - val_acc: 0.3456
Epoch 14/30
27/28 [===========================>..] - ETA: 2s - loss: 6.0290 - acc: 0.3345 Epoch 00013: val_loss did not improve
28/28 [==============================] - 70s - loss: 6.0486 - acc: 0.3340 - val_loss: 1.6120 - val_acc: 0.3380
Epoch 15/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6712 - acc: 0.3600 Epoch 00014: val_loss improved from 1.58501 to 1.57953, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.014-1.5795.hdf5
28/28 [==============================] - 70s - loss: 5.6849 - acc: 0.3583 - val_loss: 1.5795 - val_acc: 0.3443
Epoch 16/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7416 - acc: 0.3339 Epoch 00015: val_loss improved from 1.57953 to 1.53152, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.015-1.5315.hdf5
28/28 [==============================] - 70s - loss: 5.7510 - acc: 0.3357 - val_loss: 1.5315 - val_acc: 0.4168
Epoch 17/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6953 - acc: 0.3637 Epoch 00016: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.7544 - acc: 0.3664 - val_loss: 1.5724 - val_acc: 0.3520
Epoch 18/30
27/28 [===========================>..] - ETA: 2s - loss: 5.8233 - acc: 0.3345 Epoch 00017: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.8066 - acc: 0.3318 - val_loss: 1.5735 - val_acc: 0.3596
Epoch 19/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7534 - acc: 0.3429 Epoch 00018: val_loss improved from 1.53152 to 1.49215, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.018-1.4922.hdf5
28/28 [==============================] - 71s - loss: 5.7191 - acc: 0.3471 - val_loss: 1.4922 - val_acc: 0.4358
Epoch 20/30
27/28 [===========================>..] - ETA: 2s - loss: 5.6824 - acc: 0.3776 Epoch 00019: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.6767 - acc: 0.3767 - val_loss: 1.5388 - val_acc: 0.4066
Epoch 21/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3459 - acc: 0.3579 Epoch 00020: val_loss improved from 1.49215 to 1.45708, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.020-1.4571.hdf5
28/28 [==============================] - 70s - loss: 5.3862 - acc: 0.3591 - val_loss: 1.4571 - val_acc: 0.4435
Epoch 22/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2556 - acc: 0.4207 Epoch 00021: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.2532 - acc: 0.4182 - val_loss: 1.4993 - val_acc: 0.4028
Epoch 23/30
27/28 [===========================>..] - ETA: 2s - loss: 5.4009 - acc: 0.3918 Epoch 00022: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.4241 - acc: 0.3887 - val_loss: 1.5027 - val_acc: 0.4091
Epoch 24/30
27/28 [===========================>..] - ETA: 2s - loss: 5.7004 - acc: 0.3484 Epoch 00023: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.7178 - acc: 0.3502 - val_loss: 1.4978 - val_acc: 0.4053
Epoch 25/30
27/28 [===========================>..] - ETA: 2s - loss: 5.5212 - acc: 0.3828 Epoch 00024: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.5192 - acc: 0.3853 - val_loss: 1.5472 - val_acc: 0.3469
Epoch 26/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2935 - acc: 0.3788 Epoch 00025: val_loss improved from 1.45708 to 1.44253, saving model to ./resnet50_FT38_Hybrid_woNoF/checkpoint/weights.025-1.4425.hdf5
28/28 [==============================] - 70s - loss: 5.2945 - acc: 0.3795 - val_loss: 1.4425 - val_acc: 0.4676
Epoch 27/30
27/28 [===========================>..] - ETA: 2s - loss: 5.2318 - acc: 0.3837 Epoch 00026: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.2567 - acc: 0.3839 - val_loss: 1.4473 - val_acc: 0.4435
Epoch 28/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3121 - acc: 0.4135 Epoch 00027: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.3075 - acc: 0.4099 - val_loss: 1.5131 - val_acc: 0.4003
Epoch 29/30
27/28 [===========================>..] - ETA: 2s - loss: 5.3136 - acc: 0.3953 Epoch 00028: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.3020 - acc: 0.3951 - val_loss: 1.4609 - val_acc: 0.4104
Epoch 30/30
27/28 [===========================>..] - ETA: 2s - loss: 5.1836 - acc: 0.3964 Epoch 00029: val_loss did not improve
28/28 [==============================] - 70s - loss: 5.1668 - acc: 0.3954 - val_loss: 1.4685 - val_acc: 0.3926
Out[18]:
<keras.callbacks.History at 0x7f8712143a10>

In [ ]:
# VGG16
# fine tuning
start_layer = 1

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5')

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
optimizer = Adam(lr=1e-3)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=300, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=(X_valid,y_valid), class_weight=class_weight, workers=3, pickle_safe=True)
#Resnet50 #top layer training from keras.applications.resnet50 import ResNet50 base_model = ResNet50(weights='imagenet', include_top=False) x = base_model.output x = GlobalAveragePooling2D()(x) # x = Flatten()(x) # x = Dense(256, init='glorot_normal', activation='relu')(x) # x = LeakyReLU(alpha=0.33)(x) # x = Dropout(0.5)(x) # x = Dense(256, init='glorot_normal', activation='relu')(x) # x = Dense(256, init='glorot_normal')(x) # x = LeakyReLU(alpha=0.33)(x) x = Dropout(0.8)(x) predictions = Dense(len(CROP_CLASSES), init='glorot_normal', activation='softmax')(x) model = Model(input=base_model.input, output=predictions) # first: train only the top layers (which were randomly initialized) for layer in base_model.layers: layer.trainable = False # compile the model (should be done *after* setting layers to non-trainable) optimizer = Adam(lr=LEARNINGRATE) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # train the model on the new data for a few epochs model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=30, verbose=1, callbacks=[model_checkpoint, tensorboard], validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)
# Resnet50 # fine tuning # 164 conv5c+top # 142 conv5+top # 80 conv4+conv5+top # 38 conv3+conv4+conv5+top start_layer = 38 model, model_name = get_best_model() # print('Loading model from weights.004-0.0565.hdf5') # model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5') for layer in model.layers[:start_layer]: layer.trainable = False for layer in model.layers[start_layer:]: layer.trainable = True # we need to recompile the model for these modifications to take effect # we use SGD with a low learning rate optimizer = Adam(lr=1e-5) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=300, verbose=1, callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)

In [ ]:
#resume training

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model('./checkpoints/checkpoint3/weights.004-0.0565.hdf5')

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=100, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), class_weight=class_weight, nb_worker=3, pickle_safe=True)

In [4]:
#test prepare

test_model, test_model_name = get_best_model(checkpoint_dir='./resnet50_FT38_CW_STGTrain/checkpoint/')
# print('Loading model from weights.004-0.0565.hdf5')
# test_model = load_model('./checkpoints/checkpoint2/weights.004-0.0565.hdf5')

def test_generator(df, mean, datagen = None, batch_size = BATCHSIZE):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
        i = 0
        for index,row in batch_df.iterrows():
            image_file = row['image_file']
            bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
            cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x /= 255.
            if datagen is not None: x = datagen.random_transform(x)            
            x = preprocess_input(x, mean)
            batch_x[i] = x
            i += 1
        if batch_index%50 == 0: print('batch_index', batch_index)
        yield(batch_x)
        
test_aug_datagen = ImageDataGenerator(
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)


Loading model from checkpoint file ./resnet50_FT38_CW_STGTrain/checkpoint/weights.000-0.0327.hdf5
Loading model Done!

In [5]:
train_mean = [0.37698776,  0.41491762,  0.38681713]

In [ ]:
train_mean = train_datagen.mean
valid_mean = valid_datagen.mean
X_train_centered = featurewise_center(X_train)
X_valid_centered = featurewise_center(X_valid)

In [ ]:
#validation data fish logloss
 
valid_pred = test_model.predict(X_valid_centered, batch_size=BATCHSIZE, verbose=1)
# valid_pred = test_model.predict_generator(test_generator(df=valid_df, mean=valid_mean),
#                                           val_samples=valid_df.shape[0], nb_worker=1, pickle_safe=False)
valid_logloss_df = pd.DataFrame(columns=['logloss','class'])
for i in range(y_valid.shape[0]):
    index = np.argmax(y_valid[i,:])
    fish = FISH_CLASSES[index]
    logloss = -math.log(valid_pred[i,index])
    valid_logloss_df.loc[len(valid_logloss_df)]=[logloss,fish]                                       
print(valid_logloss_df.groupby(['class'])['logloss'].mean())
print(valid_logloss_df['logloss'].mean())

train_pred = test_model.predict(X_train_centered, batch_size=BATCHSIZE, verbose=1)
# train_pred = test_model.predict_generator(test_generator(df=train_df, ),
#                                           val_samples=train_df.shape[0], nb_worker=1, pickle_safe=False)
train_logloss_df = pd.DataFrame(columns=['logloss','class'])
for i in range(y_train.shape[0]):
    index = np.argmax(y_train[i,:])
    fish = FISH_CLASSES[index]
    logloss = -math.log(train_pred[i,index])
    train_logloss_df.loc[len(train_logloss_df)]=[logloss,fish]                                       
print(train_logloss_df.groupby(['class'])['logloss'].mean())
print(train_logloss_df['logloss'].mean())

In [8]:
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']

file_name = 'GTbbox_CROPpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    GTbbox_CROPpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name) 
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean), 
                                                  val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False)
    else:
        test_preds = np.zeros((GTbbox_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean, datagen=test_aug_datagen), 
                                                       val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT'])
    GTbbox_CROPpred_df = pd.concat([GTbbox_df,CROPpred_df], axis=1)
    GTbbox_CROPpred_df['logloss'] = GTbbox_CROPpred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)
    GTbbox_CROPpred_df.to_pickle(OUTPUT_DIR+file_name) 

#logloss of every fish class
print(GTbbox_CROPpred_df.groupby(['crop_class'])['logloss'].mean())
print(GTbbox_CROPpred_df['logloss'].mean())


Loading from file GTbbox_CROPpred_df_weights.000-0.0327.hdf5_.pickle
crop_class
ALB      0.076577
BET      0.139025
DOL      0.126520
LAG      0.000761
NoF      0.051943
OTHER    0.133949
SHARK    0.018328
YFT      0.090739
Name: logloss, dtype: float64
0.05936252677814113

In [9]:
# RFCNbbox_RFCNpred_df = ['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
#                          'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                          'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']
# select fish_conf >= CONF_THRESH

file_name = 'RFCNbbox_RFCNpred_df_conf{:.2f}.pickle'.format(CONF_THRESH)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)        
    RFCNbbox_RFCNpred_df = pd.DataFrame(columns=['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
                                                  'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
                                                  'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']) 

    with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'.pkl','rb') as f:
        detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1') 
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
        train_file_labels = f.readlines()
    assert len(detections_full_AGNOSTICnms) == len(test_files)
    
    count = np.zeros(len(detections_full_AGNOSTICnms))
    
    for im in range(len(detections_full_AGNOSTICnms)):
        if im%1000 == 0: print(im)
        basename = test_files[im][:9]
        if im<1000:
            image_class = '--'
        else:
            for i in range(len(train_file_labels)):
                if train_file_labels[i][:9] == basename:
                    image_class = train_file_labels[i][10:-1]
                    break
        image = Image.open(TEST_DIR+'/'+basename+'.jpg')
        width_image, height_image = image.size
        
        bboxes = []
        detects_im = detections_full_AGNOSTICnms[im]
        for i in range(len(detects_im)):
#             if np.sum(detects_im[i,5:]) >= CONF_THRESH:
            if np.max(detects_im[i,5:]) >= CONF_THRESH:
                bboxes.append(detects_im[i,:]) 
        count[im] = len(bboxes)
        if len(bboxes) == 0:
            ind = np.argmax(np.sum(detects_im[:,5:], axis=1))
            bboxes.append(detects_im[ind,:])
        bboxes = np.asarray(bboxes)

        for j in range(len(bboxes)):    
            bbox = bboxes[j]
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            assert max(xmin,0)<min(xmax,width_image)
            assert max(ymin,0)<min(ymax,height_image)
            RFCNbbox_RFCNpred_df.loc[len(RFCNbbox_RFCNpred_df)]=[image_class,basename+'.jpg',j,max(xmin,0),max(ymin,0),
                                                                   min(xmax,width_image),min(ymax,height_image),
                                                                   bbox[4],bbox[5],bbox[6],bbox[7],bbox[8],bbox[9],bbox[10],bbox[11]]   
    
    RFCNbbox_RFCNpred_df.to_pickle(OUTPUT_DIR+file_name)


Loading from file RFCNbbox_RFCNpred_df_conf0.80.pickle

In [31]:
GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['crop_class']!='NoF']


Out[31]:
image_file crop_index crop_class xmin ymin xmax ymax ALB BET DOL LAG NoF OTHER SHARK YFT logloss
0 img_00003.jpg 0.0 ALB 377.000000 66.000000 730.000000 173.000000 9.999806e-01 1.153381e-06 6.197113e-10 1.895012e-10 3.613221e-06 1.272149e-05 5.510684e-09 2.063266e-06 1.943130e-05
1 img_00003.jpg 1.0 ALB 670.000000 95.000000 1008.000000 219.000000 9.999139e-01 3.154530e-08 1.504183e-09 1.004528e-09 1.135657e-06 8.434706e-05 8.872343e-12 5.066360e-07 8.607281e-05
2 img_00003.jpg 2.0 ALB 820.000000 328.000000 1123.000000 485.000000 9.999986e-01 3.259925e-07 2.411850e-12 2.287089e-07 1.287768e-07 6.981027e-07 5.469245e-12 6.689053e-09 1.430512e-06
3 img_00003.jpg 3.0 ALB 291.000000 122.000000 643.000000 407.000000 9.998715e-01 7.074304e-07 1.498078e-13 5.582359e-11 1.212325e-04 6.655146e-08 2.109827e-15 6.572765e-06 1.285159e-04
4 img_00010.jpg 0.0 ALB 651.000000 422.000000 746.000000 612.000000 9.999821e-01 1.612292e-05 1.606051e-09 8.399101e-09 5.414718e-07 1.055654e-06 8.472713e-09 1.611917e-07 1.788155e-05
5 img_00010.jpg 1.0 ALB 831.000000 305.000000 943.000000 418.000000 9.967868e-01 5.258065e-05 2.600038e-07 1.262084e-08 1.224622e-03 1.223431e-03 1.717334e-07 7.120565e-04 3.218340e-03
6 img_00012.jpg 0.0 ALB 471.000000 513.000000 627.000000 703.000000 9.999943e-01 3.739872e-08 1.850646e-06 1.132046e-08 1.897254e-06 6.289488e-09 9.704899e-11 1.929350e-06 5.722062e-06
7 img_00015.jpg 0.0 ALB 233.000000 341.000000 444.000000 435.000000 9.743453e-01 1.293397e-03 1.068411e-06 3.617434e-07 2.392042e-02 2.577601e-04 8.125739e-07 1.809537e-04 2.598955e-02
8 img_00019.jpg 0.0 ALB 155.000000 393.000000 293.000000 468.000000 9.727021e-01 9.626866e-10 6.829089e-13 5.678039e-09 2.318743e-02 6.181457e-08 3.705326e-10 4.110374e-03 2.767742e-02
9 img_00020.jpg 0.0 ALB 586.000000 537.000000 710.000000 719.000000 9.955834e-01 3.461484e-03 5.535413e-09 3.351136e-06 3.796670e-07 8.154724e-05 1.728447e-06 8.681710e-04 4.426427e-03
10 img_00020.jpg 1.0 ALB 690.000000 454.000000 847.000000 687.000000 9.998964e-01 2.346682e-05 2.276121e-12 6.263109e-10 5.679273e-08 7.904898e-05 3.889341e-11 1.027095e-06 1.035982e-04
11 img_00020.jpg 2.0 ALB 614.000000 377.000000 756.000000 484.000000 9.999816e-01 6.440972e-07 7.907461e-08 1.131726e-11 9.268691e-06 1.255067e-06 5.675534e-06 1.460077e-06 1.835840e-05
12 img_00020.jpg 3.0 ALB 724.000000 360.000000 816.000000 537.000000 9.999635e-01 7.285932e-06 1.114230e-08 3.857879e-09 1.466452e-05 1.255418e-05 8.424390e-09 2.041774e-06 3.647871e-05
13 img_00020.jpg 4.0 ALB 630.000000 327.000000 756.000000 438.000000 9.998150e-01 2.325187e-06 5.186432e-07 1.736279e-11 4.357657e-05 5.044311e-08 1.080510e-06 1.373883e-04 1.849703e-04
14 img_00029.jpg 0.0 ALB 607.000000 343.000000 706.000000 524.000000 9.999987e-01 3.594790e-07 1.605573e-08 6.174470e-09 2.296128e-07 5.097974e-07 3.114929e-11 2.182999e-07 1.311303e-06
15 img_00029.jpg 1.0 ALB 693.000000 331.000000 773.000000 485.000000 9.990757e-01 1.419920e-05 8.545166e-06 5.261857e-05 7.995616e-04 4.140537e-05 3.945559e-09 7.982659e-06 9.247763e-04
16 img_00032.jpg 0.0 ALB 520.000000 513.000000 627.000000 568.000000 9.148430e-01 1.440050e-04 6.734094e-06 2.674154e-07 8.218089e-02 1.380336e-06 6.068142e-08 2.823662e-03 8.900279e-02
17 img_00037.jpg 0.0 ALB 291.000000 278.000000 383.000000 436.000000 9.988926e-01 7.055594e-05 3.687090e-05 2.988373e-07 7.559397e-05 6.331778e-06 1.440459e-08 9.177037e-04 1.108008e-03
18 img_00038.jpg 0.0 ALB 745.000000 254.000000 831.000000 401.000000 9.678084e-01 3.162137e-02 7.209297e-08 6.797074e-07 4.583938e-04 2.350453e-05 1.698614e-05 7.062314e-05 3.272112e-02
19 img_00039.jpg 0.0 ALB 393.000000 115.000000 653.000000 220.000000 9.999824e-01 8.426422e-07 2.922295e-09 5.091283e-09 8.092207e-08 4.532214e-08 1.761962e-07 1.647602e-05 1.764313e-05
20 img_00041.jpg 0.0 ALB 774.000000 188.000000 1074.000000 372.000000 9.973723e-01 1.446992e-04 1.484089e-06 1.172362e-03 1.302605e-03 9.352129e-08 1.967828e-08 6.406744e-06 2.631189e-03
21 img_00041.jpg 1.0 ALB 562.000000 84.000000 890.000000 193.000000 9.963587e-01 1.568967e-03 1.909340e-08 9.761413e-08 2.041460e-03 2.864800e-06 8.028692e-08 2.767326e-05 3.647953e-03
22 img_00043.jpg 0.0 ALB 556.000000 373.000000 612.000000 520.000000 9.999943e-01 1.869347e-08 6.470538e-09 3.601910e-10 3.813006e-06 1.243006e-06 3.678304e-08 5.485905e-07 5.722062e-06
23 img_00045.jpg 0.0 ALB 725.000000 432.000000 838.000000 525.000000 9.997253e-01 1.044408e-06 4.624509e-11 1.380859e-12 2.997921e-08 9.887623e-07 3.264254e-11 2.726966e-04 2.747555e-04
24 img_00055.jpg 0.0 ALB 189.000000 382.000000 375.000000 469.000000 9.991695e-01 2.037858e-04 3.110706e-05 3.521669e-07 2.683227e-06 4.896912e-07 3.195880e-07 5.917859e-04 8.308762e-04
25 img_00057.jpg 0.0 ALB 456.000000 509.000000 550.000000 666.000000 9.803254e-01 5.703667e-05 8.106243e-05 6.375744e-08 4.867250e-03 1.483713e-04 7.475342e-06 1.451331e-02 1.987072e-02
26 img_00074.jpg 0.0 ALB 415.000000 142.000000 594.000000 225.000000 9.999074e-01 8.572769e-06 5.425484e-05 2.907593e-09 8.140572e-06 3.617875e-07 8.698826e-07 2.055169e-05 9.262991e-05
27 img_00085.jpg 0.0 ALB 528.000000 145.000000 904.000000 277.000000 9.999944e-01 4.824967e-06 2.861454e-13 1.500103e-10 5.636679e-09 1.107719e-07 8.330427e-12 7.240401e-07 5.602852e-06
28 img_00090.jpg 0.0 ALB 373.000000 69.000000 736.000000 176.000000 9.999552e-01 1.398818e-07 3.503078e-11 5.925299e-11 1.950427e-07 4.438670e-05 8.072463e-10 4.472725e-08 4.482370e-05
29 img_00090.jpg 1.0 ALB 664.000000 90.000000 1011.000000 217.000000 9.990360e-01 3.220962e-08 1.046357e-09 4.747873e-10 9.323478e-06 9.547356e-04 2.068044e-11 1.235434e-08 9.645105e-04
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4341 img_07624.jpg 0.0 YFT 40.425532 313.475177 427.659574 521.276596 2.164462e-07 8.217400e-08 1.176050e-10 9.168304e-13 3.708680e-08 7.436453e-13 8.649925e-11 9.999996e-01 3.576279e-07
4342 img_07633.jpg 0.0 YFT 449.645390 404.255319 776.595745 734.042553 1.944061e-07 1.345823e-10 2.600791e-09 1.223723e-13 5.715361e-04 1.499527e-11 7.718879e-11 9.994282e-01 5.719509e-04
4343 img_07644.jpg 0.0 YFT 31.205674 284.397163 441.134752 561.702128 4.146482e-08 7.857637e-14 2.391757e-12 4.072475e-16 1.115367e-05 6.872426e-15 5.370510e-13 9.999888e-01 1.120574e-05
4344 img_07648.jpg 0.0 YFT 496.453901 458.865248 863.829787 715.602837 1.056789e-06 8.189900e-12 2.191024e-08 3.227101e-14 2.480658e-01 4.621349e-10 2.824466e-11 7.519331e-01 2.851079e-01
4345 img_07649.jpg 0.0 YFT 395.744681 338.297872 539.007092 399.290780 9.800946e-01 2.885848e-05 6.444725e-10 2.656619e-10 5.918504e-06 4.666089e-10 2.298762e-10 1.987064e-02 3.918512e+00
4346 img_07653.jpg 0.0 YFT 392.907801 339.716312 492.907801 527.659574 2.288577e-04 5.256919e-09 1.510330e-12 2.889810e-13 9.491819e-09 3.025020e-09 1.956473e-10 9.997712e-01 2.288484e-04
4347 img_07655.jpg 0.0 YFT 355.319149 412.765957 508.510638 492.198582 1.258860e-02 9.553523e-07 2.006075e-09 8.764372e-11 5.649769e-07 1.484699e-08 1.848887e-08 9.874098e-01 1.267016e-02
4348 img_07665.jpg 0.0 YFT 386.524823 397.163121 615.602837 529.787234 7.730698e-08 9.188389e-07 7.162744e-13 4.260208e-13 1.254958e-10 1.479765e-11 2.911281e-11 9.999990e-01 9.536748e-07
4349 img_07706.jpg 0.0 YFT 739.716312 119.858156 1107.092199 265.957447 6.675404e-03 5.656304e-03 7.849219e-04 5.090093e-06 1.270020e-03 2.034237e-06 4.498025e-07 9.856058e-01 1.449883e-02
4350 img_07712.jpg 0.0 YFT 384.397163 395.035461 621.985816 527.659574 9.987452e-08 6.990318e-07 3.480624e-13 1.654098e-13 5.825423e-10 2.003397e-11 4.399122e-10 9.999992e-01 8.344654e-07
4351 img_07714.jpg 0.0 YFT 518.439716 499.290780 965.957447 751.063830 4.431536e-04 5.329949e-03 5.520845e-04 3.762749e-08 1.303821e-02 1.918395e-06 1.929933e-06 9.806327e-01 1.955728e-02
4352 img_07731.jpg 0.0 YFT 0.709220 285.106383 407.092199 492.198582 2.290689e-05 1.586421e-05 9.612826e-06 8.063775e-10 2.111648e-04 6.028992e-10 5.919560e-09 9.997404e-01 2.596119e-04
4353 img_07742.jpg 0.0 YFT 392.907801 119.858156 604.964539 216.312057 7.540400e-04 5.190315e-07 1.048923e-06 2.994529e-12 5.577776e-07 3.216247e-08 4.288129e-09 9.992439e-01 7.564305e-04
4354 img_07747.jpg 0.0 YFT 396.453901 36.170213 573.758865 139.007092 4.107560e-02 1.682030e-06 1.991541e-06 3.832559e-10 7.867641e-05 2.695598e-08 5.084877e-09 9.588420e-01 4.202893e-02
4355 img_07750.jpg 0.0 YFT 382.269504 400.709220 618.439716 525.531915 1.154416e-07 3.628240e-07 7.613718e-13 1.997993e-13 3.715605e-11 6.477192e-12 5.375092e-11 9.999995e-01 4.768373e-07
4356 img_07752.jpg 0.0 YFT 516.312057 100.709220 1063.120567 470.921986 2.969756e-06 3.421748e-11 4.237403e-10 1.173359e-13 2.443959e-05 4.258927e-12 1.328220e-07 9.999725e-01 2.753773e-05
4357 img_07759.jpg 0.0 YFT 372.340426 34.751773 563.829787 142.553191 7.952327e-04 3.425041e-08 2.242637e-06 1.119358e-11 6.487022e-06 2.139202e-07 9.518618e-09 9.991958e-01 8.045691e-04
4358 img_07761.jpg 0.0 YFT 378.723404 327.659574 623.404255 600.000000 2.354784e-02 1.261270e-04 2.567733e-06 2.596141e-08 3.898747e-02 1.521842e-03 5.412410e-03 9.304017e-01 7.213880e-02
4359 img_07765.jpg 0.0 YFT 240.425532 397.163121 436.170213 506.382979 1.646531e-04 7.871562e-07 1.974605e-12 9.064095e-11 1.574225e-05 5.046791e-09 5.182184e-09 9.998189e-01 1.811549e-04
4360 img_07775.jpg 0.0 YFT 358.865248 383.687943 544.680851 507.801418 1.533139e-04 6.141526e-09 1.837603e-12 1.696163e-12 2.080864e-08 9.277617e-08 1.598433e-08 9.998466e-01 1.534341e-04
4361 img_07782.jpg 0.0 YFT 719.148936 495.744681 1050.354610 770.212766 3.887371e-04 2.830485e-04 9.643040e-07 5.700894e-10 6.147433e-05 5.678395e-07 2.822596e-07 9.992649e-01 7.353744e-04
4362 img_07828.jpg 0.0 YFT 545.390071 437.588652 944.680851 599.290780 4.730524e-05 4.999148e-08 2.016533e-07 3.433206e-13 4.449790e-07 5.377348e-11 6.311234e-10 9.999520e-01 4.804250e-05
4363 img_07849.jpg 0.0 YFT 409.219858 329.787234 502.127660 533.333333 5.100721e-02 2.538231e-06 3.884194e-10 2.187348e-09 3.148336e-07 8.814292e-06 3.255179e-06 9.489779e-01 5.236972e-02
4364 img_07852.jpg 0.0 YFT 246.808511 399.290780 431.914894 506.382979 2.338640e-01 5.934151e-06 1.941234e-10 2.950447e-09 1.761298e-04 7.111951e-09 1.075934e-08 7.659540e-01 2.666332e-01
4365 img_07853.jpg 0.0 YFT 367.375887 420.567376 534.751773 507.092199 4.134394e-03 5.552590e-06 3.968423e-10 6.416629e-10 1.457083e-07 2.094415e-08 5.564397e-11 9.958599e-01 4.148673e-03
4366 img_07854.jpg 0.0 YFT 297.163121 402.836879 504.255319 531.205674 9.831114e-03 1.452477e-05 4.623424e-09 4.330351e-09 1.166227e-06 6.302580e-06 1.310425e-07 9.901467e-01 9.902168e-03
4367 img_07891.jpg 0.0 YFT 403.546099 504.964539 1150.354610 743.971631 2.132043e-02 5.357747e-04 1.069194e-08 6.377197e-08 1.312335e-03 3.185128e-07 9.797323e-06 9.768212e-01 2.345167e-02
4368 img_07901.jpg 0.0 YFT 104.255319 348.936170 414.184397 512.765957 2.305388e-03 1.219822e-08 8.169907e-08 1.499224e-10 4.947213e-04 7.767779e-10 1.084323e-09 9.971998e-01 2.804154e-03
4369 img_07911.jpg 0.0 YFT 190.780142 53.900709 562.411348 219.858156 5.444570e-04 1.197029e-09 2.904432e-11 1.601409e-07 3.330995e-04 2.670902e-10 2.680843e-11 9.991223e-01 8.780638e-04
4370 img_07911.jpg 1.0 YFT 756.028369 112.765957 1136.879433 329.787234 5.873860e-03 4.908578e-06 1.231487e-07 4.685965e-06 1.182474e-03 1.162095e-06 1.040281e-07 9.929327e-01 7.092415e-03

4371 rows × 16 columns

file_name = 'data_test_Crop_{}_{}.pickle'.format(ROWS, COLS) if os.path.exists(OUTPUT_DIR+file_name): print ('Loading from file '+file_name) with open(OUTPUT_DIR+file_name, 'rb') as f: data_test = pickle.load(f) X_test_crop = data_train['X_test_crop'] else: print ('Generating file '+file_name) X_test_crop = np.ndarray((RFCNbbox_RFCNpred_df.shape[0], ROWS, COLS, 3), dtype=np.uint8) i = 0 for index,row in RFCNbbox_RFCNpred_df.iterrows(): image_file = row['image_file'] bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']] cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS)) X_test_crop[i] = np.asarray(cropped) i += 1 #save data to file data_test = {'X_test_crop': X_test_crop} with open(OUTPUT_DIR+file_name, 'wb') as f: pickle.dump(data_test, f) print('Loading data done.') X_test_crop = X_test_crop.astype(np.float32) print('Convert to float32 done.') X_test_crop /= 255. print('Rescale by 255 done.')

In [32]:
file_name = 'data_trainfish_Crop_{}_{}.pickle'.format(ROWS, COLS)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    with open(OUTPUT_DIR+file_name, 'rb') as f:
        data_trainfish = pickle.load(f)
    X_trainfish_crop = data_train['X_trainfish_crop']
else:
    print ('Generating file '+file_name)

    GTbbox_CROPpred_fish_df = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['crop_class']!='NoF']
    X_trainfish_crop = np.ndarray((GTbbox_CROPpred_fish_df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    i = 0
    for index,row in GTbbox_CROPpred_fish_df.iterrows():
        image_file = row['image_file']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X_trainfish_crop[i] = np.asarray(cropped)
        i += 1
   
    #save data to file
    data_trainfish = {'X_trainfish_crop': X_trainfish_crop}
    with open(OUTPUT_DIR+file_name, 'wb') as f:
        pickle.dump(data_trainfish, f)
        
print('Loading data done.')
X_trainfish_crop = X_trainfish_crop.astype(np.float32)
print('Convert to float32 done.')
X_trainfish_crop /= 255.
print('Rescale by 255 done.')


Generating file data_trainfish_Crop_224_224.pickle
Loading data done.
Convert to float32 done.
Rescale by 255 done.

In [33]:
mean(X_trainfish_crop)


Out[33]:
array([ 0.40706199,  0.4373979 ,  0.39489502], dtype=float32)

In [28]:
mean(X_test_crop[1251:])


Out[28]:
array([ 0.41078389,  0.43895897,  0.39912957], dtype=float32)

In [35]:
# test_mean = [0.41019869,  0.43978861,  0.39873621]
test_mean = [0.37698776,  0.41491762,  0.38681713]

In [55]:
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=test_mean), 
                                                  val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
    else:
        test_preds = np.zeros((RFCNbbox_RFCNpred_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=test_mean, datagen=test_aug_datagen), 
                                                       val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB_CROP', 'BET_CROP', 'DOL_CROP', 'LAG_CROP', 'NoF_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP'])
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.concat([RFCNbbox_RFCNpred_df,CROPpred_df], axis=1)
    
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF'] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF_RFCN']
    for fish in ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']:
        RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.apply(lambda row: (1-row['NoF_RFCN'])*row[[fish+'_CROP']]/(1-row['NoF_CROP']) if row['NoF_CROP']!=1 else 0, axis=1)
#     for fish in FISH_CLASSES:
#         RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish+'_CROP']

    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.to_pickle(OUTPUT_DIR+file_name)


Generating file RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_weights.000-0.0327.hdf5_.pickle
batch_index 0

In [56]:
# clsMaxAve and hybrid RFCNpred&CROPpred such that RFCNpred for NoF and CROPpred for fish
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'test_pred_df_Hybrid_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    test_pred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    
    test_pred_df = pd.DataFrame(columns=['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT'])  
    for j in range(len(test_files)): 
        image_file = test_files[j][:-1]+'.jpg'
        test_pred_im_df = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file'] == image_file,
                                                                       ['image_class', 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
        image_class = test_pred_im_df.iloc[0]['image_class']
        test_pred_im_df.drop('image_class', axis=1, inplace=True)
        max_score = test_pred_im_df.max(axis=1)
        max_cls = test_pred_im_df.idxmax(axis=1)
        test_pred_im_df['max_score'] = max_score
        test_pred_im_df['max_cls'] = max_cls
        test_pred_im_df['Count'] = test_pred_im_df.groupby(['max_cls'])['max_cls'].transform('count')
        idx = test_pred_im_df.groupby(['max_cls'])['max_score'].transform(max) == test_pred_im_df['max_score']
        test_pred_im_clsMax_df = test_pred_im_df.loc[idx,['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'Count']]
        test_pred_im_clsMax_array = test_pred_im_clsMax_df.values
        pred = np.average(test_pred_im_clsMax_array[:,:-1], axis=0, weights=test_pred_im_clsMax_array[:,-1], returned=False).tolist()
        if image_class!='--':
            ind = FISH_CLASSES.index(image_class)
            logloss = -math.log(pred[ind]) 
        else:
            logloss = np.nan
        test_pred_im_clsMaxAve = [logloss,image_class,image_file]
        test_pred_im_clsMaxAve.extend(pred)
        test_pred_df.loc[len(test_pred_df)]=test_pred_im_clsMaxAve

    test_pred_df.to_pickle(OUTPUT_DIR+file_name)


Generating file test_pred_df_Hybrid_weights.000-0.0327.hdf5_.pickle

In [ ]:
#### visualization
# RFCNbbox_RFCNpred_CROPpred_df = ['image_class', 'image_file','crop_index','x_min','y_min','x_max','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN'
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP']
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

for j in range(test_pred_df.shape[0]):
    image_logloss = test_pred_df.iat[j,0]
    image_class = test_pred_df.iat[j,1]
    image_file = test_pred_df.iat[j,2]
    if j<1000 and j%30== 0:
        pass
    else: 
        continue
    im = Image.open('../RFCN/JPEGImages/'+image_file)
    im = np.asarray(im)
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.imshow(im, aspect='equal')
    RFCN_dets = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file']==image_file]
    for index,row in RFCN_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        RFCN = [row['NoF_RFCN'],row['ALB_RFCN'],row['BET_RFCN'],row['DOL_RFCN'],row['LAG_RFCN'],row['OTHER_RFCN'],row['SHARK_RFCN'],row['YFT_RFCN']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_RFCN = max(RFCN)
        score_CROP = max(CROP)
        index_RFCN = RFCN.index(score_RFCN)
        index_CROP = CROP.index(score_CROP)
        class_RFCN = FISH_CLASSES[index_RFCN]
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=2))
        ax.text(bbox[0], bbox[1] - 2, 'RFCN_{:s} {:.3f} \nHYBRID_{:s} {:.3f}'.format(class_RFCN, score_RFCN, class_CROP, score_CROP), bbox=dict(facecolor='red', alpha=0.5), fontsize=8, color='white')       
    GT_dets = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['image_file']==image_file]
    for index,row in GT_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_CROP = max(CROP)
        index_CROP = CROP.index(score_CROP)
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='green', linewidth=2))
        ax.text(bbox[0], bbox[3] + 40, 'GT_{:s} \nCROP_{:s} {:.3f}'.format(row['crop_class'], class_CROP, score_CROP), bbox=dict(facecolor='green', alpha=0.5), fontsize=8, color='white')
    ax.set_title(('Image {:s}    FISH {:s}    logloss {}').format(image_file, image_class, image_logloss), fontsize=10) 
    plt.axis('off')
    plt.tight_layout()
    plt.draw()

In [58]:
#temperature
T = 1
test_pred_array = test_pred_df[FISH_CLASSES].values
test_pred_T_array = np.exp(np.log(test_pred_array)/T)
test_pred_T_array = test_pred_T_array/np.sum(test_pred_T_array, axis=1, keepdims=True)
test_pred_T_df = pd.DataFrame(test_pred_T_array, columns=FISH_CLASSES)
test_pred_T_df = pd.concat([test_pred_df[['image_class','image_file']],test_pred_T_df], axis=1)

#add logloss
test_pred_T_df['logloss'] = test_pred_T_df.apply(lambda row: -math.log(row[row['image_class']]) if row['image_class']!='--' else np.nan, axis=1)

#calculate train logloss
print(test_pred_T_df.groupby(['image_class'])['logloss'].mean())
train_logloss = test_pred_T_df['logloss'].mean()
print('logloss of train is', train_logloss )


image_class
--            NaN
ALB      0.043386
BET      0.234966
DOL      0.140754
LAG      0.000835
NoF      0.126442
OTHER    0.105386
SHARK    0.147953
YFT      0.080953
Name: logloss, dtype: float64
logloss of train is 0.08309862497761385

In [62]:
#test submission
submission = test_pred_T_df.loc[:999,['image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
submission.rename(columns={'image_file':'image'}, inplace=True)
sub_file = 'RFCN_AGONOSTICnms_'+RFCN_MODEL+'_'+CROP_MODEL+'_clsMaxAve_conf{:.2f}_T{}_'.format(CONF_THRESH, T)+'{:.4f}'.format(train_logloss)+'.csv'
submission.to_csv(sub_file, index=False)
print('Done!'+sub_file)


Done!RFCN_AGONOSTICnms_resnet101_rfcn_ohem_iter_30000_resnet50_FT38_Classifier_Rep_clsMaxAve_conf0.80_T2.5_0.1780.csv