exact repeat of Classifier in keras1 while Classifier_rep4 in keras2 same as Classifier_rep5 since Classifier_rep5 didn't reach the same val_loss we retrain by using data like original Classifier in resnet50_FT38_CW lr reduce patience is 10 & early stopping patience is 25 weights.091-0.0406.hdf5 15616/15583 [==============================] - 427s - loss: 0.0301 - acc: 0.9968 - val_loss: 0.0406 - val_acc: 0.9887 valid loss: 0.054934760724733266 valid_woNoF loss: 0.09762307678173414 crop_class ALB 0.039396 BET 1.001106 DOL 0.000125 LAG 0.000388 OTHER 0.063752 SHARK 0.000097 YFT 0.020004 train loss: 0.012347070935408364 train_woNoF loss: 0.0051947846192810405 crop_class ALB 0.007792 BET 0.002834 DOL 0.000022 LAG 0.000119 OTHER 0.001002 SHARK 0.000725 YFT 0.002580 all loss: 0.05037969053777609 all_woNoF loss: 0.10533664062901534 crop_class ALB 0.044404 BET 0.570618 DOL 0.269748 LAG 0.003975 OTHER 0.191621 SHARK 0.064067 YFT 0.079982 RFCN_AGONOSTICnms_resnet101_rfcn_ohem_iter_30000_resnet50_FT38_Classifier_Rep6_weights.091-0.0406.hdf5_clsMaxAve_conf0.80_cropvalidloss0.0976_imageallloss0.1035_T2.5.csv lr reduce patience is 5 & early stopping patience is 15 weights.073-0.0472.hdf5 15616/15583 [==============================] - 447s - loss: 0.0789 - acc: 0.9920 - val_loss: 0.0472 - val_acc: 0.9871 valid loss: 0.06061852928100923 valid_woNoF loss: 0.10658013534412561 crop_class ALB 0.082938 BET 0.638375 DOL 0.000099 LAG 0.000289 OTHER 0.036710 SHARK 0.000095 YFT 0.074220 train loss: 0.023448730071607797 train_woNoF loss: 0.022590322968225395 crop_class ALB 0.033394 BET 0.012264 DOL 0.000744 LAG 0.000255 OTHER 0.001929 SHARK 0.000384 YFT 0.014202 all loss: 0.0571241506242606 crop_class ALB 0.134876 BET 0.383430 DOL 0.299616 LAG 0.003642 NoF 0.029710 OTHER 0.211802 SHARK 0.051833 YFT 0.111545 all_woNoF loss: 0.11463064874655438 crop_class ALB 0.080437 BET 0.377342 DOL 0.267654 LAG 0.000993 OTHER 0.178607 SHARK 0.047496 YFT 0.101583 Name: logloss_woNoF, dtype: float64 RFCN_AGONOSTICnms_resnet101_rfcn_ohem_iter_30000_resnet50_FT38_Classifier_Rep6_weights.073-0.0472.hdf5_clsMaxAve_conf0.80_cropvalidloss0.1066_imageallloss0.1114_T2.5.csv

In [1]:
import os, random, glob, pickle, collections, math, json, time
import numpy as np
import pandas as pd
from __future__ import division
from __future__ import print_function
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder

import matplotlib.pyplot as plt
%matplotlib inline 

from keras.models import Sequential, Model, load_model
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')


Using TensorFlow backend.

In [2]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../RFCN/JPEGImages/'
TRAIN_CROP_DIR = '../data/train_crop/'
TEST_CROP_DIR = '../data/test_stg1_crop/'
RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CROP_MODEL = 'resnet50_FT38_Classifier_Rep6'
if not os.path.exists('./' + CROP_MODEL):
    os.mkdir('./' + CROP_MODEL)
CHECKPOINT_DIR = './' + CROP_MODEL + '/checkpoint/'
if not os.path.exists(CHECKPOINT_DIR):
    os.mkdir(CHECKPOINT_DIR)
LOG_DIR = './' + CROP_MODEL + '/log/'
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)
OUTPUT_DIR = './' + CROP_MODEL + '/output/'
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
FISH_CLASSES_2 = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
# CROP_CLASSES=FISH_CLASSES[:]
# CROP_CLASSES.remove('NoF')
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 128
LEARNINGRATE = 1e-4
le = LabelEncoder()
le.fit(FISH_CLASSES)
print(le.transform(FISH_CLASSES))

def imagewise_center(x):
    mean = np.mean(x, axis=0, keepdims=True)
    x_centered = x - mean
    return x_centered

def channelwise_center(x):
    mean = np.mean(x, axis=0, keepdims=True)
    mean = np.mean(mean, axis=(1,2), keepdims=True)
    x_centered = x - mean
    return x_centered    

def imagewise_mean(x):
    mean = np.mean(x, axis=0)
    return mean

def channelwise_mean(x):
    mean = np.mean(x, axis=0)
    mean = np.mean(mean, axis=(0,1))
    return mean

def preprocess_imagewise(x, imagewise_mean):
    #resnet50 image preprocessing
#     'RGB'->'BGR'
#     x = x[:, :, ::-1]
#     x /= 255.
    x -= imagewise_mean
    return x

def preprocess_channelwise(x, channelwise_mean):
    #resnet50 image preprocessing
#     'RGB'->'BGR'
#     x = x[:, :, ::-1]
#     x /= 255.
    x -= np.reshape(channelwise_mean, [1, 1, 3])
    return x

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
#     img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    width_cropped, height_cropped = cropped.size
    if height_cropped > width_cropped: cropped = cropped.transpose(method=2)
    image_name = 'temp_{:f}.jpg'.format(time.time())
    cropped.save(image_name)
    cropped = Image.open(image_name)   
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]), Image.BILINEAR)
    os.remove(image_name)
    return cropped

def get_best_model(checkpoint_dir = CHECKPOINT_DIR):
    files = glob.glob(checkpoint_dir+'*')
    val_losses = [float(f.split('-')[-1][:-5]) for f in files]
    index = val_losses.index(min(val_losses))
    print('Loading model from checkpoint file ' + files[index])
    model = load_model(files[index])
    model_name = files[index].split('/')[-1]
    print('Loading model Done!')
    return (model, model_name)

def data_from_df(df):
    X = np.ndarray((df.shape[0], ROWS, COLS, 3), dtype=np.uint8)
    y = np.zeros((df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
    i = 0
    for index,row in df.iterrows():
        image_file = row['image_file']
        fish = row['crop_class']
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
        X[i] = np.asarray(cropped)
        y[i,FISH_CLASSES.index(fish)] = 1
        i += 1
    return (X, y)

def data_load(name):
    file_name = 'data_'+name+'_{}_{}.pickle'.format(ROWS, COLS)
    if os.path.exists(OUTPUT_DIR+file_name):
        print ('Loading from file '+file_name)
        with open(OUTPUT_DIR+file_name, 'rb') as f:
            data = pickle.load(f)
        X = data['X']
        y = data['y']
    else:
        print ('Generating file '+file_name)
        
        if name=='train' or name=='valid': 
            df = GTbbox_df[GTbbox_df['split']==name]
        elif name=='all':
            df = GTbbox_df
        else:
            print('Invalid name '+name)
    
        X, y = data_from_df(df)

        data = {'X': X,'y': y}
        with open(OUTPUT_DIR+file_name, 'wb') as f:
            pickle.dump(data, f)
    X = X.astype(np.float32)
    X /= 255.
    return (X, y)


[4 0 1 2 3 5 6 7]
#crop and cache to TRAIN_CROP_DIR by BBannotations if not os.path.exists(TRAIN_CROP_DIR): os.mkdir(TRAIN_CROP_DIR) for c in FISH_CLASSES: TRAIN_CROP_DIR_c = TRAIN_CROP_DIR + '{}/'.format(c) if not os.path.exists(TRAIN_CROP_DIR_c): os.mkdir(TRAIN_CROP_DIR_c) files = glob.glob(TRAIN_CROP_DIR_c+'*') for f in files: os.remove(f) GT_crop_bboxs_df = pd.DataFrame(columns=['GT_crop_files', 'xmin', 'ymin', 'xmax', 'ymax']) crop_classes=FISH_CLASSES[:] crop_classes.remove('NoF') count = {} for c in crop_classes: j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r')) for l in j: filename = l["filename"] head, tail = os.path.split(filename) basename, file_extension = os.path.splitext(tail) image = Image.open(TRAIN_DIR+c+'/'+tail) for i in range(len(l["annotations"])): a = l["annotations"][i] file_crop = TRAIN_CROP_DIR + '{}/'.format(a["class"])+c+'_'+basename+'_{}_'.format(i)+a["class"]+'.jpg' xmin = (a["x"]) ymin = (a["y"]) width = (a["width"]) height = (a["height"]) xmax = xmin + width ymax = ymin + height #save cropped img cropped = image.crop((max(xmin,0), max(ymin,0), xmax, ymax)) width_cropped, height_cropped = cropped.size if height_cropped > width_cropped: cropped = cropped.transpose(method=2) cropped.save(file_crop) if a["class"] != c: print(file_crop) GT_crop_bboxs_df.loc[len(GT_crop_bboxs_df)]=[file_crop.split('/')[-1],max(xmin,0),max(ymin,0),xmax,ymax] count[c] = len(os.listdir(TRAIN_CROP_DIR+c)) num_NoF = sum(count.values())*3 #crop and cache to TRAIN_CROP_DIR/NoF by RFCN #crop images by detections_full_AGNOSTICnms.pkl RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000' with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'.pkl','rb') as f: detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1') train_detections_full_AGNOSTICnms = detections_full_AGNOSTICnms[1000:] num_NoF_perIm = math.ceil(num_NoF / len(train_detections_full_AGNOSTICnms)) outputs = [] for im in range(len(train_detections_full_AGNOSTICnms)): #for im in range(1): outputs_im = [] detects_im = train_detections_full_AGNOSTICnms[im] for i in range(len(detects_im)): if detects_im[i,4] >= 0.999: outputs_im.append(detects_im[i,:]) outputs_im = np.asarray(outputs_im) outputs_im = outputs_im[np.random.choice(outputs_im.shape[0], num_NoF_perIm, replace=False), :] outputs.append(outputs_im) train_outputs = outputs with open("../RFCN/ImageSets/Main/train_test.txt","r") as f: train_files = f.readlines() for i in range(len(train_outputs)): basename = train_files[i][:9] bboxes = train_outputs[i] image = Image.open(TEST_DIR+basename+'.jpg') for j in range(len(bboxes)): bbox = bboxes[j] xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] file_crop = TRAIN_CROP_DIR+'NoF/'+train_files[i][10:-1]+'_'+basename+'_{}_NoF'.format(j)+'.jpg' cropped = image.crop((xmin, ymin, xmax, ymax)) width_cropped, height_cropped = cropped.size if height_cropped > width_cropped: cropped = cropped.transpose(method=2) cropped.save(file_crop) GT_crop_bboxs_df.loc[len(GT_crop_bboxs_df)]=[file_crop.split('/')[-1],xmin,ymin,xmax,ymax] GT_crop_bboxs_df.to_pickle(OUTPUT_DIR+'GT_crop_files_BBox.pickle') count['NoF'] = len(os.listdir(TRAIN_CROP_DIR+'NoF')) print(count)

In [5]:
#Loading data
import pickle

def read_image(src):
    """Read and resize individual images"""
    im = Image.open(src)
    im = im.resize((COLS, ROWS), Image.BILINEAR)
    im = np.asarray(im)
    return im

if os.path.exists(OUTPUT_DIR+'data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS)):
    print ('Exist data_train_BBCrop_{}_{}.pickle. Loading data from file.'.format(ROWS, COLS))
    with open(OUTPUT_DIR+'data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'rb') as f:
        data_train = pickle.load(f)
    X_train_crop = data_train['X_train_crop']
    y_train_crop = data_train['y_train_crop']
    train_crop_files = data_train['train_crop_files']
    class_weight = data_train['class_weight']
else:
    print ('Loading data from original images. Generating data_train_BBCrop_{}_{}.pickle.'.format(ROWS, COLS))
    
    y_train_crop = []
    train_crop_files = []

    for fish in FISH_CLASSES:
        fish_dir = TRAIN_CROP_DIR+'{}'.format(fish)
        fish_files = [fish+'/'+im for im in os.listdir(fish_dir)]
        train_crop_files.extend(fish_files)

        y_fish = np.tile(fish, len(fish_files))
        y_train_crop.extend(y_fish)

    y_train_crop = np.array(y_train_crop)
    X_train_crop = np.ndarray((len(train_crop_files), ROWS, COLS, 3), dtype=np.uint8)

    for i, im in enumerate(train_crop_files): 
        X_train_crop[i] = read_image(TRAIN_CROP_DIR+im)
        if i%1000 == 0: print('Processed {} of {}'.format(i, len(train_crop_files)))

    # class_weight
    y_train_crop = le.transform(y_train_crop)
    class_weight = dict(collections.Counter(y_train_crop))
    ref = max(class_weight.values())
    for key,value in class_weight.items():
        class_weight[key] = ref/value
    # One Hot Encoding Labels
    y_train_crop = np_utils.to_categorical(y_train_crop)
    
    train_crop_files = [file.split('/')[-1] for file in train_crop_files]
    
    #save data to file
    data_train = {'X_train_crop': X_train_crop,'y_train_crop': y_train_crop,'train_crop_files': train_crop_files,'class_weight':class_weight}

    with open(OUTPUT_DIR+'data_train_BBCrop_{}_{}.pickle'.format(ROWS, COLS), 'wb') as f:
        pickle.dump(data_train, f)

#rescale
print('Loading data done.')
X_train_crop = X_train_crop.astype(np.float32)
print('Convert to float32 done.')
X_train_crop /= 255.
print('Rescale by 255 done.')
#traing stg1 and stg2 and resume should have the same train test split!!! Remenber to set the random_state!
X_train, X_valid, y_train, y_valid = train_test_split(X_train_crop, y_train_crop, test_size=0.2, random_state=1986, stratify=y_train_crop)
X_all = X_train_crop
y_all = y_train_crop


Exist data_train_BBCrop_224_224.pickle. Loading data from file.
Loading data done.
Convert to float32 done.
Rescale by 255 done.
# GTbbox_df = ['image_file','crop_index','crop_class','xmin',''ymin','xmax','ymax','split'] file_name = 'GTbbox_df.pickle' if os.path.exists(OUTPUT_DIR+file_name): print ('Loading from file '+file_name) GTbbox_df = pd.read_pickle(OUTPUT_DIR+file_name) else: print ('Generating file '+file_name) GTbbox_df = pd.DataFrame(columns=['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax']) crop_classes=FISH_CLASSES[:] crop_classes.remove('NoF') for c in crop_classes: print(c) j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r')) for l in j: filename = l["filename"] head, image_file = os.path.split(filename) basename, file_extension = os.path.splitext(image_file) image = Image.open(TEST_DIR+'/'+image_file) width_image, height_image = image.size for i in range(len(l["annotations"])): a = l["annotations"][i] xmin = (a["x"]) ymin = (a["y"]) width = (a["width"]) height = (a["height"]) xmax = xmin + width ymax = ymin + height assert max(xmin,0)= 0.999)] np.random.seed(1986) bboxes = detects_im[np.random.choice(detects_im.shape[0], num_NoF_perIm, replace=False), :] for j in range(bboxes.shape[0]): bbox = bboxes[j] xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] # assert max(xmin,0)
#Load data X_train, y_train = data_load('train') X_valid, y_valid = data_load('valid') print('Loading data done.') print('train sample', X_train.shape[0]) print('valid sample', X_valid.shape[0]) # print('mean of X_train is', mean(X_train)) # print('mean of X_valid is', mean(X_valid)) # X_train_centered = featurewise_center(X_train) # X_valid_centered = featurewise_center(X_valid) # print('Featurewise centered done.')
# #class weight = n_samples / (n_classes * np.bincount(y)) # class_weight_fish = dict(GTbbox_df.groupby('crop_class').size()) # class_weight = {} # n_samples = GTbbox_df.shape[0] # for key,value in class_weight_fish.items(): # class_weight[CROP_CLASSES.index(key)] = n_samples / (len(CROP_CLASSES)*value) # class_weight class_weight_fish = dict(GTbbox_df.groupby('crop_class').size()) class_weight = {} ref = max(class_weight_fish.values()) for key,value in class_weight_fish.items(): class_weight[FISH_CLASSES.index(key)] = ref/value class_weight
#data augmentation train_datagen = ImageDataGenerator( featurewise_center=True, rotation_range=180, shear_range=0.2, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, vertical_flip=True) train_datagen.fit(X_train) train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCHSIZE, shuffle=True, seed=None) valid_datagen = ImageDataGenerator(featurewise_center=True) valid_datagen.fit(X_valid) valid_generator = valid_datagen.flow(X_valid, y_valid, batch_size=BATCHSIZE, shuffle=True, seed=None) # assert X_train_centered.shape[0]%BATCHSIZE==0 # steps_per_epoch = int(X_train_centered.shape[0]/BATCHSIZE)

In [6]:
#data preprocessing

train_datagen = ImageDataGenerator(
    featurewise_center=True,
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)
train_datagen.fit(X_train)
train_generator = train_datagen.flow(X_train, y_train, batch_size=BATCHSIZE, shuffle=True, seed=None)

valid_datagen = ImageDataGenerator(featurewise_center=True)
valid_datagen.fit(X_valid)   
valid_generator = valid_datagen.flow(X_valid, y_valid, batch_size=BATCHSIZE, shuffle=True, seed=None)

In [10]:
#callbacks

early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=25, verbose=1, mode='auto')        

model_checkpoint = ModelCheckpoint(filepath=CHECKPOINT_DIR+'weights.{epoch:03d}-{val_loss:.4f}.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto')
        
learningrate_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, mode='auto', epsilon=0.001, cooldown=0, min_lr=0)

tensorboard = TensorBoard(log_dir=LOG_DIR, histogram_freq=0, write_graph=False, write_images=True)

In [ ]:
#Resnet50
#top layer training

from keras.applications.resnet50 import ResNet50

base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# x = Flatten()(x)
# x = Dense(256)(x)
# x = LeakyReLU(alpha=0.33)(x)
# x = Dropout(0.5)(x)
predictions = Dense(len(FISH_CLASSES), init='glorot_normal', activation='softmax')(x)

model = Model(input=base_model.input, output=predictions)

# first: train only the top layers (which were randomly initialized)
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
optimizer = Adam(lr=LEARNINGRATE)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# train the model on the new data for a few epochs
model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=30, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), 
                    class_weight=class_weight, nb_worker=3, pickle_safe=True)

In [ ]:
### Resnet50
# fine tuning
# 164 conv5c+top
# 142 conv5+top
# 80 conv4+conv5+top
# 38 conv3+conv4+conv5+top
start_layer = 38

model, model_name = get_best_model()
# print('Loading model from weights.004-0.0565.hdf5')
# model = load_model(CHECHPOINT_DIR+'weights.004-0.0565.hdf5')

for layer in model.layers[:start_layer]:
   layer.trainable = False
for layer in model.layers[start_layer:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
optimizer = Adam(lr=1e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=300, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), 
                    class_weight=class_weight, nb_worker=3, pickle_safe=True, initial_epoch=26)

In [9]:
#resume training

# model, model_name = get_best_model()
model = load_model(CHECKPOINT_DIR + 'weights.073-0.0472.hdf5')
model_name = 'weights.073-0.0472.hdf5'
print('model_name', model_name)

# #try increasing learningrate
# optimizer = Adam(lr=1e-4)
# model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(train_generator, samples_per_epoch=len(X_train), nb_epoch=300, verbose=1, 
                    callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard], 
                    validation_data=valid_generator, nb_val_samples=len(X_valid), 
                    class_weight=class_weight, nb_worker=3, pickle_safe=True, initial_epoch=74)


model_name weights.072-0.0496.hdf5
Epoch 74/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0782 - acc: 0.9920
/opt/anaconda3/lib/python3.5/site-packages/keras/engine/training.py:1470: UserWarning: Epoch comprised more than `samples_per_epoch` samples, which might affect learning results. Set `samples_per_epoch` correctly to avoid this warning.
  warnings.warn('Epoch comprised more than '
Epoch 00073: val_loss improved from inf to 0.04724, saving model to ./resnet50_FT38_Classifier_Rep6/checkpoint/weights.073-0.0472.hdf5
15616/15583 [==============================] - 447s - loss: 0.0789 - acc: 0.9920 - val_loss: 0.0472 - val_acc: 0.9871
Epoch 75/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0932 - acc: 0.9901Epoch 00074: val_loss did not improve
15616/15583 [==============================] - 420s - loss: 0.0931 - acc: 0.9901 - val_loss: 0.0637 - val_acc: 0.9829
Epoch 76/300
15517/15583 [============================>.] - ETA: 1s - loss: 0.0716 - acc: 0.9916Epoch 00075: val_loss did not improve
15645/15583 [==============================] - 427s - loss: 0.0712 - acc: 0.9916 - val_loss: 0.0514 - val_acc: 0.9844
Epoch 77/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0844 - acc: 0.9902Epoch 00076: val_loss did not improve
15616/15583 [==============================] - 422s - loss: 0.0841 - acc: 0.9902 - val_loss: 0.0594 - val_acc: 0.9829
Epoch 78/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0749 - acc: 0.9912Epoch 00077: val_loss did not improve
15616/15583 [==============================] - 422s - loss: 0.0744 - acc: 0.9912 - val_loss: 0.0634 - val_acc: 0.9846
Epoch 79/300
15517/15583 [============================>.] - ETA: 1s - loss: 0.0869 - acc: 0.9913Epoch 00078: val_loss did not improve
15645/15583 [==============================] - 423s - loss: 0.0891 - acc: 0.9912 - val_loss: 0.0592 - val_acc: 0.9836
Epoch 80/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0733 - acc: 0.9912Epoch 00079: val_loss did not improve

Epoch 00079: reducing learning rate to 9.999999974752428e-08.
15616/15583 [==============================] - 423s - loss: 0.0728 - acc: 0.9913 - val_loss: 0.0604 - val_acc: 0.9811
Epoch 81/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0623 - acc: 0.9930Epoch 00080: val_loss did not improve
15616/15583 [==============================] - 421s - loss: 0.0622 - acc: 0.9930 - val_loss: 0.0539 - val_acc: 0.9849
Epoch 82/300
15517/15583 [============================>.] - ETA: 1s - loss: 0.0747 - acc: 0.9923Epoch 00081: val_loss did not improve
15645/15583 [==============================] - 422s - loss: 0.0746 - acc: 0.9923 - val_loss: 0.0521 - val_acc: 0.9834
Epoch 83/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0708 - acc: 0.9928Epoch 00082: val_loss did not improve
15616/15583 [==============================] - 422s - loss: 0.0707 - acc: 0.9929 - val_loss: 0.0532 - val_acc: 0.9836
Epoch 84/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0649 - acc: 0.9913Epoch 00083: val_loss did not improve
15616/15583 [==============================] - 421s - loss: 0.0651 - acc: 0.9912 - val_loss: 0.0616 - val_acc: 0.9816
Epoch 85/300
15517/15583 [============================>.] - ETA: 1s - loss: 0.0860 - acc: 0.9914Epoch 00084: val_loss did not improve

Epoch 00084: reducing learning rate to 1.0000000116860975e-08.
15645/15583 [==============================] - 423s - loss: 0.0856 - acc: 0.9914 - val_loss: 0.0705 - val_acc: 0.9798
Epoch 86/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0674 - acc: 0.9929Epoch 00085: val_loss did not improve
15616/15583 [==============================] - 422s - loss: 0.0674 - acc: 0.9929 - val_loss: 0.0605 - val_acc: 0.9841
Epoch 87/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0700 - acc: 0.9928Epoch 00086: val_loss did not improve
15616/15583 [==============================] - 421s - loss: 0.0697 - acc: 0.9928 - val_loss: 0.0653 - val_acc: 0.9819
Epoch 88/300
15517/15583 [============================>.] - ETA: 1s - loss: 0.0732 - acc: 0.9901Epoch 00087: val_loss did not improve
15645/15583 [==============================] - 423s - loss: 0.0728 - acc: 0.9901 - val_loss: 0.0532 - val_acc: 0.9856
Epoch 89/300
15488/15583 [============================>.] - ETA: 2s - loss: 0.0720 - acc: 0.9919Epoch 00088: val_loss did not improve
15616/15583 [==============================] - 421s - loss: 0.0762 - acc: 0.9919 - val_loss: 0.0598 - val_acc: 0.9829
Epoch 90/300
  640/15583 [>.............................] - ETA: 360s - loss: 0.0512 - acc: 0.9953
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-9-d514125cdecf> in <module>()
     13                     callbacks=[early_stopping, model_checkpoint, learningrate_schedule, tensorboard],
     14                     validation_data=valid_generator, nb_val_samples=len(X_valid),
---> 15                     class_weight=class_weight, nb_worker=3, pickle_safe=True, initial_epoch=73)

/opt/anaconda3/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
   1449                     outs = self.train_on_batch(x, y,
   1450                                                sample_weight=sample_weight,
-> 1451                                                class_weight=class_weight)
   1452                 except:
   1453                     _stop.set()

/opt/anaconda3/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
   1224             ins = x + y + sample_weights
   1225         self._make_train_function()
-> 1226         outputs = self.train_function(ins)
   1227         if len(outputs) == 1:
   1228             return outputs[0]

/opt/anaconda3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   1094             feed_dict[tensor] = value
   1095         session = get_session()
-> 1096         updated = session.run(self.outputs + [self.updates_op], feed_dict=feed_dict)
   1097         return updated[:len(self.outputs)]
   1098 

/opt/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    715     try:
    716       result = self._run(None, fetches, feed_dict, options_ptr,
--> 717                          run_metadata_ptr)
    718       if run_metadata:
    719         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/opt/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    913     if final_fetches or final_targets:
    914       results = self._do_run(handle, final_targets, final_fetches,
--> 915                              feed_dict_string, options, run_metadata)
    916     else:
    917       results = []

/opt/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    963     if handle is None:
    964       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 965                            target_list, options, run_metadata)
    966     else:
    967       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/opt/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
    970   def _do_call(self, fn, *args):
    971     try:
--> 972       return fn(*args)
    973     except errors.OpError as e:
    974       message = compat.as_text(e.message)

/opt/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    952         return tf_session.TF_Run(session, options,
    953                                  feed_dict, fetch_list, target_list,
--> 954                                  status, run_metadata)
    955 
    956     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 
# print('train_imagewise_mean:', train_datagen.mean) # print('valid_imagewise_mean:', valid_datagen.mean) # np.mean(np.absolute(train_datagen.mean - valid_datagen.mean)) # np.std(np.absolute(train_datagen.mean - valid_datagen.mean)) # train_mean = imagewise_mean(X_train) # assert (train_mean == train_datagen.mean).all() # valid_mean = imagewise_mean(X_valid) # assert (valid_mean == valid_datagen.mean).all() # np.mean(train_mean, axis=(0,1)) # np.mean(valid_mean, axis=(0,1))

In [11]:
#test prepare

# test_model, test_model_name = get_best_model()
test_model = load_model(CHECKPOINT_DIR + 'weights.073-0.0472.hdf5')
test_model_name = 'weights.073-0.0472.hdf5'
print('model_name', model_name)


def test_generator(df, mean, datagen = None, batch_size = BATCHSIZE):
    n = df.shape[0]
    batch_index = 0
    while 1:
        current_index = batch_index * batch_size
        if n >= current_index + batch_size:
            current_batch_size = batch_size
            batch_index += 1    
        else:
            current_batch_size = n - current_index
            batch_index = 0        
        batch_df = df[current_index:current_index+current_batch_size]
        batch_x = np.zeros((batch_df.shape[0], ROWS, COLS, 3), dtype=K.floatx())
        i = 0
        for index,row in batch_df.iterrows():
            image_file = row['image_file']
            bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
            cropped = load_img(TEST_DIR+image_file,bbox,target_size=(ROWS,COLS))
            x = np.asarray(cropped, dtype=K.floatx())
            x /= 255.
            if datagen is not None: x = datagen.random_transform(x)            
            x = preprocess_imagewise(x, mean)
            batch_x[i] = x
            i += 1
        if batch_index%50 == 0: print('batch_index', batch_index)
        yield(batch_x)
        
test_aug_datagen = ImageDataGenerator(
    rotation_range=180,
    shear_range=0.2,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True)


model_name weights.072-0.0496.hdf5

In [12]:
#valid
# print(test_model.evaluate(X_valid-imagewise_mean(X_train), y_valid, batch_size=BATCHSIZE, verbose=1))
valid_pred = test_model.predict(X_valid-imagewise_mean(X_train), batch_size=BATCHSIZE, verbose=1)
valid_pred_df = pd.DataFrame(valid_pred, columns=FISH_CLASSES_2)
y_valid_fish = []
for i in range(y_valid.shape[0]):
    index = np.argmax(y_valid[i,:])
    y_valid_fish.append(FISH_CLASSES_2[index])
valid_pred_df['crop_class'] = pd.Series(y_valid_fish)
valid_pred_df['logloss'] = valid_pred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)
for fish in FISH_CLASSES:
    valid_pred_df[fish+'_woNoF'] = valid_pred_df.apply(lambda row: row[fish]/(1-row['NoF']+K.epsilon()) if fish!='NoF' else np.inf , axis=1)
valid_pred_df['logloss_woNoF'] = valid_pred_df.apply(lambda row: -math.log(row[row['crop_class']+'_woNoF']), axis=1)
print('valid loss:', valid_pred_df['logloss'].mean())
print('valid_woNoF loss:', valid_pred_df[valid_pred_df['crop_class']!='NoF']['logloss_woNoF'].mean())
print(valid_pred_df[valid_pred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean())
crop_valid_woNoF_logloss = valid_pred_df[valid_pred_df['crop_class']!='NoF']['logloss_woNoF'].mean()
print('crop_valid_woNoF_logloss:', crop_valid_woNoF_logloss)


3896/3896 [==============================] - 42s    
valid loss: 0.06061852928100923
valid_woNoF loss: 0.10658013534412561
crop_class
ALB      0.082938
BET      0.638375
DOL      0.000099
LAG      0.000289
OTHER    0.036710
SHARK    0.000095
YFT      0.074220
Name: logloss_woNoF, dtype: float64
crop_valid_woNoF_logloss: 0.10658013534412561

In [13]:
#train
# print(test_model.evaluate(X_train-imagewise_mean(X_train), y_train, batch_size=BATCHSIZE, verbose=1))
train_pred = test_model.predict(X_train-imagewise_mean(X_train), batch_size=BATCHSIZE, verbose=1)
train_pred_df = pd.DataFrame(train_pred, columns=FISH_CLASSES_2)
y_train_fish = []
for i in range(y_train.shape[0]):
    index = np.argmax(y_train[i,:])
    y_train_fish.append(FISH_CLASSES_2[index])
train_pred_df['crop_class'] = pd.Series(y_train_fish)
train_pred_df['logloss'] = train_pred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)
for fish in FISH_CLASSES:
    train_pred_df[fish+'_woNoF'] = train_pred_df.apply(lambda row: row[fish]/(1-row['NoF']+K.epsilon()) if fish!='NoF' else np.inf , axis=1)
train_pred_df['logloss_woNoF'] = train_pred_df.apply(lambda row: -math.log(row[row['crop_class']+'_woNoF']), axis=1)
print('train loss:', train_pred_df['logloss'].mean())
print('train_woNoF loss:', train_pred_df[train_pred_df['crop_class']!='NoF']['logloss_woNoF'].mean())
print(train_pred_df[train_pred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean())


15583/15583 [==============================] - 146s   
train loss: 0.023448730071607797
train_woNoF loss: 0.022590322968225395
crop_class
ALB      0.033394
BET      0.012264
DOL      0.000744
LAG      0.000255
OTHER    0.001929
SHARK    0.000384
YFT      0.014202
Name: logloss_woNoF, dtype: float64

In [15]:
#all
# print(test_model.evaluate(X_all-imagewise_mean(X_train), y_all, batch_size=BATCHSIZE, verbose=1))
all_pred = test_model.predict(X_all-imagewise_mean(X_train), batch_size=BATCHSIZE, verbose=1)
all_pred_df = pd.DataFrame(all_pred, columns=FISH_CLASSES_2)
y_all_fish = []
for i in range(y_all.shape[0]):
    index = np.argmax(y_all[i,:])
    y_all_fish.append(FISH_CLASSES_2[index])
all_pred_df['crop_class'] = pd.Series(y_all_fish)
all_pred_df['logloss'] = all_pred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1)
for fish in FISH_CLASSES:
    all_pred_df[fish+'_woNoF'] = all_pred_df.apply(lambda row: row[fish]/(1-row['NoF']+K.epsilon()) if fish!='NoF' else np.inf , axis=1)
all_pred_df['logloss_woNoF'] = all_pred_df.apply(lambda row: -math.log(row[row['crop_class']+'_woNoF']), axis=1)
print('all loss:', all_pred_df['logloss'].mean())
print(all_pred_df.groupby(['crop_class'])['logloss'].mean())
print('all_woNoF loss:', all_pred_df[all_pred_df['crop_class']!='NoF']['logloss_woNoF'].mean())
print(all_pred_df[all_pred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean())


all loss: 0.0571241506242606
crop_class
ALB      0.134876
BET      0.383430
DOL      0.299616
LAG      0.003642
NoF      0.029710
OTHER    0.211802
SHARK    0.051833
YFT      0.111545
Name: logloss, dtype: float64
all_woNoF loss: 0.11463064874655438
crop_class
ALB      0.080437
BET      0.377342
DOL      0.267654
LAG      0.000993
OTHER    0.178607
SHARK    0.047496
YFT      0.101583
Name: logloss_woNoF, dtype: float64
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax','split' # 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss', # 'ALB_woNoF', 'BET_woNoF', 'DOL_woNoF', 'LAG_woNoF', 'OTHER_woNoF', 'SHARK_woNoF', 'YFT_woNoF', 'logloss_woNoF'] file_name = 'GTbbox_CROPpred_df_'+test_model_name+'_.pickle' if os.path.exists(OUTPUT_DIR+file_name): print ('Loading from file '+file_name) GTbbox_CROPpred_df = pd.read_pickle(OUTPUT_DIR+file_name) else: print ('Generating file '+file_name) # nb_augmentation = 1 # if nb_augmentation ==1: # test_preds = test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean), # val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False) # # test_preds = test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean), # # steps=int(math.ceil(GTbbox_df.shape[0]/BATCHSIZE)), workers=1, pickle_safe=False) # else: # test_preds = np.zeros((GTbbox_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx()) # for idx in range(nb_augmentation): # print('{}th augmentation for testing ...'.format(idx+1)) # test_preds += test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean, datagen=test_aug_datagen), # val_samples=GTbbox_df.shape[0], nb_worker=1, pickle_safe=False) # # test_preds += test_model.predict_generator(test_generator(df=GTbbox_df, mean=train_mean, datagen=test_aug_datagen), # # steps=GTbbox_df.shape[0], workers=1, pickle_safe=False) # test_preds /= nb_augmentation CROPpred_df = pd.DataFrame(test_preds, columns=FISH_CLASSES_2) GTbbox_CROPpred_df = pd.concat([GTbbox_df,CROPpred_df], axis=1) GTbbox_CROPpred_df['logloss'] = GTbbox_CROPpred_df.apply(lambda row: -math.log(row[row['crop_class']]), axis=1) for fish in FISH_CLASSES: GTbbox_CROPpred_df[fish+'_woNoF'] = GTbbox_CROPpred_df.apply(lambda row: row[fish]/(1-row['NoF']+K.epsilon()) if fish!='NoF' else np.inf , axis=1) GTbbox_CROPpred_df['logloss_woNoF'] = GTbbox_CROPpred_df.apply(lambda row: -math.log(row[row['crop_class']+'_woNoF']), axis=1) GTbbox_CROPpred_df.to_pickle(OUTPUT_DIR+file_name) # valid_CROPpred_df = GTbbox_CROPpred_df[GTbbox_CROPpred_df['split']=='valid'] crop_all_woNoF_logloss = GTbbox_CROPpred_df[GTbbox_CROPpred_df['crop_class']!='NoF']['logloss_woNoF'].mean() print('crop_all_woNoF_logloss:', crop_all_woNoF_logloss)
print('all loss:', GTbbox_CROPpred_df['logloss'].mean()) # print('all fish loss:', GTbbox_CROPpred_df[GTbbox_CROPpred_df['crop_class']!='NoF']['logloss'].mean()) # print(GTbbox_CROPpred_df.groupby(['crop_class'])['logloss'].mean()) print('all_woNoF loss:', GTbbox_CROPpred_df[GTbbox_CROPpred_df['crop_class']!='NoF']['logloss_woNoF'].mean()) print(GTbbox_CROPpred_df[GTbbox_CROPpred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean()) train_CROPpred_df = GTbbox_CROPpred_df[GTbbox_CROPpred_df['split']=='train'] print('train loss:', train_CROPpred_df['logloss'].mean()) # print('train fish loss:', train_CROPpred_df[train_CROPpred_df['crop_class']!='NoF']['logloss'].mean()) # print(train_CROPpred_df.groupby(['crop_class'])['logloss'].mean()) print('train_woNoF loss:', train_CROPpred_df[train_CROPpred_df['crop_class']!='NoF']['logloss_woNoF'].mean()) print(train_CROPpred_df[train_CROPpred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean()) valid_CROPpred_df = GTbbox_CROPpred_df[GTbbox_CROPpred_df['split']=='valid'] print('valid loss:', valid_CROPpred_df['logloss'].mean()) # print('valid fish loss:', valid_CROPpred_df[valid_CROPpred_df['crop_class']!='NoF']['logloss'].mean()) # print(valid_CROPpred_df.groupby(['crop_class'])['logloss'].mean()) print('valid_woNoF loss:', valid_CROPpred_df[valid_CROPpred_df['crop_class']!='NoF']['logloss_woNoF'].mean()) print(valid_CROPpred_df[valid_CROPpred_df['crop_class']!='NoF'].groupby(['crop_class'])['logloss_woNoF'].mean())
fish_loglosses = {} for fish in FISH_CLASSES: fish_loglosses[fish] = GTbbox_CROPpred_df[GTbbox_CROPpred_df['crop_class']==fish]['logloss'] fish_loglosses['ALB'].plot.box()

In [16]:
# RFCNbbox_RFCNpred_df = ['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
#                          'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                          'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']
# select fish_conf >= CONF_THRESH

file_name = 'RFCNbbox_RFCNpred_df_conf{:.2f}.pickle'.format(CONF_THRESH)
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)        
    RFCNbbox_RFCNpred_df = pd.DataFrame(columns=['image_class','image_file','crop_index','xmin','ymin','xmax','ymax',
                                                  'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
                                                  'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN']) 

    with open('../data/RFCN_detections/detections_full_AGNOSTICnms_'+RFCN_MODEL+'.pkl','rb') as f:
        detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1') 
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
        train_file_labels = f.readlines()
    assert len(detections_full_AGNOSTICnms) == len(test_files)
    
    count = np.zeros(len(detections_full_AGNOSTICnms))
    
    for im in range(len(detections_full_AGNOSTICnms)):
        if im%1000 == 0: print(im)
        basename = test_files[im][:9]
        if im<1000:
            image_class = '--'
        else:
            for i in range(len(train_file_labels)):
                if train_file_labels[i][:9] == basename:
                    image_class = train_file_labels[i][10:-1]
                    break
        image = Image.open(TEST_DIR+'/'+basename+'.jpg')
        width_image, height_image = image.size
        
        bboxes = []
        detects_im = detections_full_AGNOSTICnms[im]
        for i in range(len(detects_im)):
            if np.sum(detects_im[i,5:]) >= CONF_THRESH:
#             if np.max(detects_im[i,5:]) >= CONF_THRESH:
                bboxes.append(detects_im[i,:]) 
        count[im] = len(bboxes)
        if len(bboxes) == 0:
            ind = np.argmax(np.sum(detects_im[:,5:], axis=1))
            bboxes.append(detects_im[ind,:])
        bboxes = np.asarray(bboxes)

        for j in range(len(bboxes)):    
            bbox = bboxes[j]
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            assert max(xmin,0)<min(xmax,width_image)
            assert max(ymin,0)<min(ymax,height_image)
            RFCNbbox_RFCNpred_df.loc[len(RFCNbbox_RFCNpred_df)]=[image_class,basename+'.jpg',j,max(xmin,0),max(ymin,0),
                                                                   min(xmax,width_image),min(ymax,height_image),
                                                                   bbox[4],bbox[5],bbox[6],bbox[7],bbox[8],bbox[9],bbox[10],bbox[11]]   
    
    RFCNbbox_RFCNpred_df.to_pickle(OUTPUT_DIR+file_name)


Loading from file RFCNbbox_RFCNpred_df_conf0.80.pickle

In [17]:
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    nb_augmentation = 1
    if nb_augmentation ==1:
        test_preds = test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=imagewise_mean(X_train)), 
                                                  val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
    else:
        test_preds = np.zeros((RFCNbbox_RFCNpred_df.shape[0], len(FISH_CLASSES)), dtype=K.floatx())
        for idx in range(nb_augmentation):
            print('{}th augmentation for testing ...'.format(idx+1))
            test_preds += test_model.predict_generator(test_generator(df=RFCNbbox_RFCNpred_df, mean=imagewise_mean(X_train), datagen=test_aug_datagen), 
                                                       val_samples=RFCNbbox_RFCNpred_df.shape[0], nb_worker=1, pickle_safe=False)
        test_preds /= nb_augmentation

    CROPpred_df = pd.DataFrame(test_preds, columns=['ALB_CROP', 'BET_CROP', 'DOL_CROP', 'LAG_CROP', 'NoF_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP'])
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = pd.concat([RFCNbbox_RFCNpred_df,CROPpred_df], axis=1)
    
    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF'] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['NoF_RFCN']
    for fish in ['ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']:
        RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.apply(lambda row: (1-row['NoF_RFCN'])*row[fish+'_CROP']/(1-row['NoF_CROP']+K.epsilon()), axis=1)

#     for fish in FISH_CLASSES:
#         RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish] = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df[fish+'_CROP']

    RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.to_pickle(OUTPUT_DIR+file_name) 
    print('Done!')


Generating file RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df_weights.073-0.0472.hdf5_.pickle
batch_index 0

In [18]:
# clsMaxAve and hybrid RFCNpred&CROPpred such that RFCNpred for NoF and CROPpred for fish
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
# RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df = ['image_class', 'image_file','crop_index','xmin','ymin','xmax','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN',
#                                    'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP',
#                                    'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

file_name = 'test_pred_df_Hybrid_'+test_model_name+'_.pickle'
if os.path.exists(OUTPUT_DIR+file_name):
    print ('Loading from file '+file_name)
    test_pred_df = pd.read_pickle(OUTPUT_DIR+file_name)
else:
    print ('Generating file '+file_name)  
    with open("../RFCN/ImageSets/Main/test.txt","r") as f:
        test_files = f.readlines()
    
    test_pred_df = pd.DataFrame(columns=['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT'])  
    for j in range(len(test_files)): 
        image_file = test_files[j][:-1]+'.jpg'
        test_pred_im_df = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file'] == image_file,
                                                                       ['image_class', 'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
        image_class = test_pred_im_df.iloc[0]['image_class']
        test_pred_im_df.drop('image_class', axis=1, inplace=True)
        max_score = test_pred_im_df.max(axis=1)
        max_cls = test_pred_im_df.idxmax(axis=1)
        test_pred_im_df['max_score'] = max_score
        test_pred_im_df['max_cls'] = max_cls
        test_pred_im_df['Count'] = test_pred_im_df.groupby(['max_cls'])['max_cls'].transform('count')
        idx = test_pred_im_df.groupby(['max_cls'])['max_score'].transform(max) == test_pred_im_df['max_score']
        test_pred_im_clsMax_df = test_pred_im_df.loc[idx,['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'Count']]
        test_pred_im_clsMax_array = test_pred_im_clsMax_df.values
        pred = np.average(test_pred_im_clsMax_array[:,:-1], axis=0, weights=test_pred_im_clsMax_array[:,-1], returned=False).tolist()
        if image_class!='--':
            ind = FISH_CLASSES.index(image_class)
            logloss = -math.log(pred[ind]) 
        else:
            logloss = np.nan
        test_pred_im_clsMaxAve = [logloss,image_class,image_file]
        test_pred_im_clsMaxAve.extend(pred)
        test_pred_df.loc[len(test_pred_df)]=test_pred_im_clsMaxAve

    test_pred_df.to_pickle(OUTPUT_DIR+file_name) 
    print('Done!')

image_all_logloss = test_pred_df[test_pred_df['image_class']!='--']['logloss'].mean()
print('imag_all_logloss:', image_all_logloss)


Generating file test_pred_df_Hybrid_weights.073-0.0472.hdf5_.pickle
Done!
imag_all_logloss: 0.11137381319005099

In [ ]:
#### visualization
# RFCNbbox_RFCNpred_CROPpred_df = ['image_class', 'image_file','crop_index','x_min','y_min','x_max','ymax',
#                                    'NoF_RFCN', 'ALB_RFCN', 'BET_RFCN', 'DOL_RFCN',
#                                    'LAG_RFCN', 'OTHER_RFCN', 'SHARK_RFCN', 'YFT_RFCN'
#                                    'NoF_CROP', 'ALB_CROP', 'BET_CROP', 'DOL_CROP',
#                                    'LAG_CROP', 'OTHER_CROP', 'SHARK_CROP', 'YFT_CROP']
#GTbbox_CROPpred_df = ['image_file','crop_index','crop_class','xmin','ymin','xmax','ymax',
#                      'NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT', 'logloss']
# test_pred_df = ['logloss','image_class','image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']

for j in range(test_pred_df.shape[0]):
    image_logloss = test_pred_df.iat[j,0]
    image_class = test_pred_df.iat[j,1]
    image_file = test_pred_df.iat[j,2]
    if j<1000 and j%30== 0:
        pass
    else: 
        continue
    im = Image.open('../RFCN/JPEGImages/'+image_file)
    im = np.asarray(im)
    fig, ax = plt.subplots(figsize=(10, 8))
    ax.imshow(im, aspect='equal')
    RFCN_dets = RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df.loc[RFCNbbox_RFCNpred_CROPpred_HYBRIDpred_df['image_file']==image_file]
    for index,row in RFCN_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        RFCN = [row['NoF_RFCN'],row['ALB_RFCN'],row['BET_RFCN'],row['DOL_RFCN'],row['LAG_RFCN'],row['OTHER_RFCN'],row['SHARK_RFCN'],row['YFT_RFCN']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_RFCN = max(RFCN)
        score_CROP = max(CROP)
        index_RFCN = RFCN.index(score_RFCN)
        index_CROP = CROP.index(score_CROP)
        class_RFCN = FISH_CLASSES[index_RFCN]
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='red', linewidth=2))
        ax.text(bbox[0], bbox[1] - 2, 'RFCN_{:s} {:.3f} \nHYBRID_{:s} {:.3f}'.format(class_RFCN, score_RFCN, class_CROP, score_CROP), bbox=dict(facecolor='red', alpha=0.5), fontsize=8, color='white')       
    GT_dets = GTbbox_CROPpred_df.loc[GTbbox_CROPpred_df['image_file']==image_file]
    for index,row in GT_dets.iterrows():
        bbox = [row['xmin'],row['ymin'],row['xmax'],row['ymax']]
        CROP = [row['NoF'],row['ALB'],row['BET'],row['DOL'],row['LAG'],row['OTHER'],row['SHARK'],row['YFT']]
        score_CROP = max(CROP)
        index_CROP = CROP.index(score_CROP)
        class_CROP = FISH_CLASSES[index_CROP]
        ax.add_patch(plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor='green', linewidth=2))
        ax.text(bbox[0], bbox[3] + 40, 'GT_{:s} \nCROP_{:s} {:.3f}'.format(row['crop_class'], class_CROP, score_CROP), bbox=dict(facecolor='green', alpha=0.5), fontsize=8, color='white')
    ax.set_title(('Image {:s}    FISH {:s}    logloss {}').format(image_file, image_class, image_logloss), fontsize=10) 
    plt.axis('off')
    plt.tight_layout()
    plt.draw()

In [19]:
#temperature
T = 2.5
test_pred_array = test_pred_df[FISH_CLASSES].values
test_pred_T_array = np.exp(np.log(test_pred_array)/T)
test_pred_T_array = test_pred_T_array/np.sum(test_pred_T_array, axis=1, keepdims=True)
test_pred_T_df = pd.DataFrame(test_pred_T_array, columns=FISH_CLASSES)
test_pred_T_df = pd.concat([test_pred_df[['image_class','image_file']],test_pred_T_df], axis=1)

#test submission
submission = test_pred_T_df.loc[:999,['image_file','NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']]
submission.rename(columns={'image_file':'image'}, inplace=True)
sub_file = 'RFCN_AGONOSTICnms_'+RFCN_MODEL+'_'+CROP_MODEL+'_'+test_model_name+'_clsMaxAve_conf{:.2f}_cropvalidloss{:.4f}_imageallloss{:.4f}_T{}.csv'.format(CONF_THRESH, crop_valid_woNoF_logloss, image_all_logloss, T)
submission.to_csv(sub_file, index=False)
submission.to_csv(OUTPUT_DIR + sub_file, index=False)
print('Done!'+sub_file)


Done!RFCN_AGONOSTICnms_resnet101_rfcn_ohem_iter_30000_resnet50_FT38_Classifier_Rep6_weights.073-0.0472.hdf5_clsMaxAve_conf0.80_cropvalidloss0.1066_imageallloss0.1114_T2.5.csv

In [ ]: