Train sea lion classifier with a convnet


In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/02/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/07/'
IMAGE_DIMS = (84,84,3)#class0
#IMAGE_DIMS = (84,84,3)#class1
#IMAGE_DIMS = (56,56,3)#class2
#IMAGE_DIMS = (42,42,3)#class3
#IMAGE_DIMS = (26,26,3)#class4

TRAIN_CLASS = 0
SAVE_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights-medium1-class0.h5'

Y_CHANGE = [[1,2,3,4,5],[0]]
TRAIN_WEIGHT_RAW = (1,0.2,0.2,0.2,0.2,0.2)
TRAIN_WEIGHT = (1,1)
TEST_WEIGHT_RAW = (1,0.2,0.2,0.2,0.2,0.2)
TEST_WEIGHT = (1,1)

SAVE_MODEL_FILE = None
LOAD_WEIGHTS_FILE = SAVE_WEIGHTS_FILE
LOAD_MODEL_FILE = None
INPUT_DATASET_NAME = 'lion-patches-0px'

RECREATE_OUTPUT_DIR = True
RUN_TRAINING = True

TRAIN_EPOCHS = 10
INPUT_RANGE = 1
DEBUG = False

BATCH_SIZE=48

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import models

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions


Using TensorFlow backend.

Prepare

Prepare output dir


In [3]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=RECREATE_OUTPUT_DIR)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
TF_LOGS_DIR = OUTPUT_DIR + 'tf-logs/'
WEIGHTS_DIR = OUTPUT_DIR + 'weights/'
input_dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)

logger.info('Output dirs created')


2017-06-11 13:37:58,608 INFO Output dirs created

Prepare train, validate and test data flows


In [4]:
logger.info('Using dataset ' + input_dataset_path + ' as input')
h5file = h5py.File(input_dataset_path, 'r')

#used for image augmentation (creating new images for balancing)
image_augmentation_generator = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=359,
        width_shift_range=0,
        height_shift_range=0,
        shear_range=0,
        horizontal_flip=True,
        vertical_flip=True)

#applied to all images during training
image_randomize_generator = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=359,
        width_shift_range=0,
        height_shift_range=0,
        shear_range=0,
        horizontal_flip=True,
        vertical_flip=True)

logger.info('preparing train data')
train_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
train_balance_generator = utils.ClassBalancerGeneratorXY(train_batch_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TRAIN_WEIGHT_RAW,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0, end_ratio=0.7)
train_image_generator = utils.ImageAugmentationXYGenerator(train_balance_generator, image_randomize_generator)
train_generator = utils.ClassBalancerGeneratorXY(train_image_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TRAIN_WEIGHT,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0, end_ratio=1,
                                                         change_y=Y_CHANGE)
#train_generator = utils.ChangeXYGenerator(train_image_generator, categorical_to_boolean)
logger.info('train size=' + str(train_generator.size) + ' batches=' + str(train_generator.nr_batches))


logger.info('preparing valid data')
valid_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
valid_balance_generator = utils.ClassBalancerGeneratorXY(valid_batch_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TEST_WEIGHT_RAW,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0.7, end_ratio=0.85)
valid_generator = utils.ClassBalancerGeneratorXY(valid_balance_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TEST_WEIGHT,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0, end_ratio=1,
                                                         change_y=Y_CHANGE)
logger.info('valid size=' + str(valid_generator.size) + ' batches=' + str(valid_generator.nr_batches))



logger.info('preparing test data')
test_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
test_balance_generator = utils.ClassBalancerGeneratorXY(test_batch_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TEST_WEIGHT_RAW,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0.85, end_ratio=1)
test_generator = utils.ClassBalancerGeneratorXY(test_balance_generator,
                                                         image_augmentation=image_augmentation_generator,
                                                         output_weight=TEST_WEIGHT,
                                                         max_augmentation_ratio=1,
                                                         max_undersampling_ratio=1,
                                                         enforce_max_ratios=False,
                                                         batch_size=BATCH_SIZE,
                                                         start_ratio=0, end_ratio=1,
                                                         change_y=Y_CHANGE)
logger.info('test size=' + str(test_generator.size) + ' batches=' + str(test_generator.nr_batches))

#FIXME when using 1 on end ratio size and nr_batches gets negative (h5 batch generator, not balancer...)


2017-06-11 13:37:58,649 INFO Using dataset ../../input/kaggle-sea-lion/02/lion-patches-0px-84-84.h5 as input
2017-06-11 13:37:58,665 INFO preparing train data
2017-06-11 13:37:58,668 INFO loading input data for class distribution analysis...
2017-06-11 13:37:58,669 INFO loading Y from raw dataset
2017-06-11 13:37:58,671 INFO > [started] generator dump...
1600/2263
2017-06-11 13:37:59,088 INFO > [done]    generator dump (417.298 ms)
2017-06-11 13:37:59,092 INFO raw sample class distribution
2017-06-11 13:37:59,093 INFO 0: 78
2017-06-11 13:37:59,095 INFO 1: 45
2017-06-11 13:37:59,097 INFO 2: 675
2017-06-11 13:37:59,098 INFO 3: 150
2017-06-11 13:37:59,100 INFO 4: 281
2017-06-11 13:37:59,101 INFO 5: 1034
2017-06-11 13:37:59,103 INFO overall output samples per class: 90
2017-06-11 13:37:59,104 INFO augmentation/undersampling ratio per class
2017-06-11 13:37:59,106 INFO SETUP FLOW 0 0.7
2017-06-11 13:37:59,108 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:37:59,109 INFO output distribution for this flow
2017-06-11 13:37:59,111 INFO 0: 62 (1.15)
2017-06-11 13:37:59,112 INFO 1: 62 (2.00)
2017-06-11 13:37:59,113 INFO 2: 62 (0.13)
2017-06-11 13:37:59,115 INFO 3: 62 (0.60)
2017-06-11 13:37:59,116 INFO 4: 62 (0.32)
2017-06-11 13:37:59,117 INFO 5: 62 (0.09)
2017-06-11 13:37:59,120 INFO source range: 0-1792 (1792)
2017-06-11 13:37:59,121 INFO output range: 0-377 (377)
2017-06-11 13:37:59,123 INFO loading input data for class distribution analysis...
2017-06-11 13:37:59,125 INFO loading Y from raw dataset
2017-06-11 13:37:59,126 INFO > [started] generator dump...
2017-06-11 13:37:59,127 INFO starting new flow...
2017-06-11 13:37:59,943 INFO > [done]    generator dump (817.028 ms)
2017-06-11 13:37:59,947 INFO raw sample class distribution
2017-06-11 13:37:59,949 INFO 0: 320
2017-06-11 13:37:59,950 INFO 1: 57
2017-06-11 13:37:59,952 INFO overall output samples per class: 114
2017-06-11 13:37:59,953 INFO augmentation/undersampling ratio per class
2017-06-11 13:37:59,954 INFO SETUP FLOW 0 1
2017-06-11 13:37:59,956 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:37:59,957 INFO output distribution for this flow
2017-06-11 13:37:59,959 INFO 0: 114 (0.36)
2017-06-11 13:37:59,960 INFO 1: 114 (2.00)
2017-06-11 13:37:59,962 INFO source range: 0-376 (376)
2017-06-11 13:37:59,963 INFO output range: 0-228 (228)
2017-06-11 13:37:59,965 INFO train size=228 batches=5
2017-06-11 13:37:59,966 INFO preparing valid data
2017-06-11 13:37:59,968 INFO loading input data for class distribution analysis...
2017-06-11 13:37:59,969 INFO loading Y from raw dataset
2017-06-11 13:37:59,970 INFO > [started] generator dump...
1600/2263
2017-06-11 13:38:00,306 INFO > [done]    generator dump (335.333 ms)
2017-06-11 13:38:00,308 INFO raw sample class distribution
2017-06-11 13:38:00,310 INFO 0: 78
2017-06-11 13:38:00,311 INFO 1: 45
2017-06-11 13:38:00,313 INFO 2: 675
2017-06-11 13:38:00,315 INFO 3: 150
2017-06-11 13:38:00,316 INFO 4: 281
2017-06-11 13:38:00,318 INFO 5: 1034
2017-06-11 13:38:00,320 INFO overall output samples per class: 90
2017-06-11 13:38:00,322 INFO augmentation/undersampling ratio per class
2017-06-11 13:38:00,323 INFO SETUP FLOW 0.7 0.85
2017-06-11 13:38:00,325 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,326 INFO output distribution for this flow
2017-06-11 13:38:00,327 INFO 0: 13 (1.15)
2017-06-11 13:38:00,329 INFO 1: 13 (2.00)
2017-06-11 13:38:00,330 INFO 2: 13 (0.13)
2017-06-11 13:38:00,332 INFO 3: 13 (0.60)
2017-06-11 13:38:00,333 INFO 4: 13 (0.32)
2017-06-11 13:38:00,334 INFO 5: 13 (0.09)
2017-06-11 13:38:00,337 INFO source range: 1586-1864 (278)
2017-06-11 13:38:00,338 INFO output range: 378-458 (80)
2017-06-11 13:38:00,340 INFO loading input data for class distribution analysis...
2017-06-11 13:38:00,341 INFO loading Y from raw dataset
2017-06-11 13:38:00,343 INFO > [started] generator dump...
2017-06-11 13:38:00,345 INFO starting new flow...
2017-06-11 13:38:00,440 INFO > [done]    generator dump (97.604 ms)
2017-06-11 13:38:00,445 INFO raw sample class distribution
2017-06-11 13:38:00,447 INFO 0: 66
2017-06-11 13:38:00,448 INFO 1: 14
2017-06-11 13:38:00,449 INFO overall output samples per class: 28
2017-06-11 13:38:00,451 INFO augmentation/undersampling ratio per class
2017-06-11 13:38:00,452 INFO SETUP FLOW 0 1
2017-06-11 13:38:00,453 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,455 INFO output distribution for this flow
2017-06-11 13:38:00,456 INFO 0: 28 (0.42)
2017-06-11 13:38:00,458 INFO 1: 28 (2.00)
2017-06-11 13:38:00,459 INFO source range: 0-79 (79)
2017-06-11 13:38:00,460 INFO output range: 0-56 (56)
2017-06-11 13:38:00,461 INFO SETUP FLOW 0 79
2017-06-11 13:38:00,463 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,464 INFO output distribution for this flow
2017-06-11 13:38:00,465 INFO 0: 7031 (1.15)
2017-06-11 13:38:00,467 INFO 1: 7110 (2.00)
2017-06-11 13:38:00,468 INFO 2: 7110 (0.13)
2017-06-11 13:38:00,469 INFO 3: 7110 (0.60)
2017-06-11 13:38:00,471 INFO 4: 7110 (0.32)
2017-06-11 13:38:00,472 INFO 5: 7110 (0.09)
2017-06-11 13:38:00,475 INFO source range: 0-2262 (2262)
2017-06-11 13:38:00,477 INFO output range: 0-42581 (42581)
2017-06-11 13:38:00,478 INFO valid size=56 batches=2
2017-06-11 13:38:00,479 INFO preparing test data
2017-06-11 13:38:00,482 INFO loading input data for class distribution analysis...
2017-06-11 13:38:00,483 INFO loading Y from raw dataset
2017-06-11 13:38:00,484 INFO > [started] generator dump...
1600/2263
2017-06-11 13:38:00,836 INFO > [done]    generator dump (352.203 ms)
2017-06-11 13:38:00,839 INFO raw sample class distribution
2017-06-11 13:38:00,841 INFO 0: 78
2017-06-11 13:38:00,842 INFO 1: 45
2017-06-11 13:38:00,844 INFO 2: 675
2017-06-11 13:38:00,845 INFO 3: 150
2017-06-11 13:38:00,847 INFO 4: 281
2017-06-11 13:38:00,848 INFO 5: 1034
2017-06-11 13:38:00,849 INFO overall output samples per class: 90
2017-06-11 13:38:00,850 INFO augmentation/undersampling ratio per class
2017-06-11 13:38:00,852 INFO SETUP FLOW 0.85 1
2017-06-11 13:38:00,853 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,855 INFO output distribution for this flow
2017-06-11 13:38:00,856 INFO 0: 13 (1.15)
2017-06-11 13:38:00,858 INFO 1: 13 (2.00)
2017-06-11 13:38:00,859 INFO 2: 13 (0.13)
2017-06-11 13:38:00,861 INFO 3: 13 (0.60)
2017-06-11 13:38:00,862 INFO 4: 13 (0.32)
2017-06-11 13:38:00,863 INFO 5: 13 (0.09)
2017-06-11 13:38:00,866 INFO source range: 2028-2262 (234)
2017-06-11 13:38:00,868 INFO output range: 459-539 (80)
2017-06-11 13:38:00,869 INFO loading input data for class distribution analysis...
2017-06-11 13:38:00,870 INFO loading Y from raw dataset
2017-06-11 13:38:00,872 INFO > [started] generator dump...
2017-06-11 13:38:00,873 INFO starting new flow...
2017-06-11 13:38:00,967 INFO > [done]    generator dump (95.183 ms)
2017-06-11 13:38:00,971 INFO raw sample class distribution
2017-06-11 13:38:00,972 INFO 0: 72
2017-06-11 13:38:00,974 INFO 1: 8
2017-06-11 13:38:00,975 INFO overall output samples per class: 16
2017-06-11 13:38:00,976 INFO augmentation/undersampling ratio per class
2017-06-11 13:38:00,978 INFO SETUP FLOW 0 1
2017-06-11 13:38:00,979 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,981 INFO output distribution for this flow
2017-06-11 13:38:00,982 INFO 0: 16 (0.22)
2017-06-11 13:38:00,983 INFO 1: 16 (2.00)
2017-06-11 13:38:00,985 INFO source range: 0-79 (79)
2017-06-11 13:38:00,986 INFO output range: 0-32 (32)
2017-06-11 13:38:00,988 INFO SETUP FLOW 0 79
2017-06-11 13:38:00,989 INFO calculating source range according to start/end range of the desired output..
2017-06-11 13:38:00,990 INFO output distribution for this flow
2017-06-11 13:38:00,992 INFO 0: 7031 (1.15)
2017-06-11 13:38:00,993 INFO 1: 7110 (2.00)
2017-06-11 13:38:00,994 INFO 2: 7110 (0.13)
2017-06-11 13:38:00,995 INFO 3: 7110 (0.60)
2017-06-11 13:38:00,996 INFO 4: 7110 (0.32)
2017-06-11 13:38:00,997 INFO 5: 7110 (0.09)
2017-06-11 13:38:01,001 INFO source range: 0-2262 (2262)
2017-06-11 13:38:01,062 INFO output range: 0-42581 (42581)
2017-06-11 13:38:01,064 INFO test size=32 batches=1

In [5]:
#logger.info('INPUT DATASET DATA')
#dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
#with h5py.File(input_dataset_path, 'r') as h5file:
#    logger.info('generator')
#    input_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0.001, end_ratio=0.0012, batch_size=64)
#    X, Y = utils.dump_xy_to_array(input_generator.flow(), input_generator.size, x=True, y=True)
#    utils.show_images(X, image_labels=utils.onehot_to_label(Y), group_by_label=False, cols=10, is_bgr=True, size=2)
#
#    logger.info('x ' + str(np.shape(X)))
#    logger.info('y ' + str(np.shape(Y)))
#    logger.info(str(utils.class_distribution(Y)))

if(DEBUG):
    logger.info('BALANCE GENERATOR DATA')
    #dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
    X_train, Y_train = utils.dump_xy_to_array(train_generator.flow(), train_generator.size, x=False, y=True)
    logger.info('y ' + str(np.shape(Y_train)))
    #logger.info(str(utils.class_distribution(Y_train)))

    for xs,ys in train_generator.flow():
        utils.show_images(xs, image_labels=ys, cols=10, is_bgr=True, size=2)
        break

Prepare CNN model


In [6]:
logger.info('Load CNN model')
#model = lions.convnet_alexnet2_lion_keras(IMAGE_DIMS)

model = None
if(LOAD_MODEL_FILE!=None and os.path.isfile(LOAD_MODEL_FILE)):
    with open(LOAD_MODEL_FILE, 'r') as model_file:
        my = model_file.read()
        model = models.model_from_yaml(my)
        logger.info('loaded model from file ' + LOAD_MODEL_FILE)
else:
    model = lions.convnet_medium1_boolean(IMAGE_DIMS)
    logger.info('loaded model from function convnet_medium1_single')
    

if(LOAD_WEIGHTS_FILE!=None and os.path.isfile(LOAD_WEIGHTS_FILE)):
    model.load_weights(LOAD_WEIGHTS_FILE)
    logger.info('Loaded model weights from ' + LOAD_WEIGHTS_FILE)

logger.info('Model prepared')


2017-06-11 13:38:01,106 INFO Load CNN model
/notebooks/datascience-snippets/kaggle-sea-lion/modules/lions.py:164: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), padding="same", kernel_initializer="glorot_uniform", activation="relu")`
  model.add(convolutional.Conv2D(64, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
/notebooks/datascience-snippets/kaggle-sea-lion/modules/lions.py:166: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), padding="same", kernel_initializer="glorot_uniform", activation="relu")`
  model.add(convolutional.Conv2D(128, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
/notebooks/datascience-snippets/kaggle-sea-lion/modules/lions.py:168: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), padding="same", kernel_initializer="glorot_uniform", activation="relu")`
  model.add(convolutional.Conv2D(256, (3, 3), activation='relu', padding='same', init='glorot_uniform'))
/notebooks/datascience-snippets/kaggle-sea-lion/modules/lions.py:173: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(1024, kernel_initializer="glorot_uniform", activation="relu")`
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
/notebooks/datascience-snippets/kaggle-sea-lion/modules/lions.py:175: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(1024, kernel_initializer="glorot_uniform", activation="relu")`
  model.add(core.Dense(1024, activation='relu', init='glorot_uniform'))
2017-06-11 13:38:01,217 INFO loaded model from function convnet_medium1_single
2017-06-11 13:38:01,221 INFO Model prepared

Train model


In [7]:
if(RUN_TRAINING):
    logger.info('Starting CNN training...')
    history = model.fit_generator(train_generator.flow(),
                  steps_per_epoch = train_generator.nr_batches,
                  nb_epoch = TRAIN_EPOCHS,
                  callbacks = cnn.get_callbacks_keras(model, WEIGHTS_DIR, TF_LOGS_DIR),
                  validation_data = valid_generator.flow(), 
                  validation_steps = valid_generator.nr_batches,
                  verbose = 1)

    if(SAVE_MODEL_FILE!=None):
        with open(SAVE_MODEL_FILE, 'w') as model_file:
            model_file.write(model.to_yaml())
            logger.info('Saved last model to ' + SAVE_MODEL_FILE)
    
    if(SAVE_WEIGHTS_FILE!=None):
        model.save_weights(SAVE_WEIGHTS_FILE)
        logger.info('Saved last weights to ' + SAVE_WEIGHTS_FILE)


2017-06-11 13:38:01,232 INFO Starting CNN training...
INFO:tensorflow:Summary name conv2d_1/kernel:0 is illegal; using conv2d_1/kernel_0 instead.
2017-06-11 13:38:01,436 INFO Summary name conv2d_1/kernel:0 is illegal; using conv2d_1/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_1/kernel:0 is illegal; using conv2d_1/kernel_0 instead.
2017-06-11 13:38:01,441 INFO Summary name conv2d_1/kernel:0 is illegal; using conv2d_1/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_1/bias:0 is illegal; using conv2d_1/bias_0 instead.
2017-06-11 13:38:01,445 INFO Summary name conv2d_1/bias:0 is illegal; using conv2d_1/bias_0 instead.
INFO:tensorflow:Summary name conv2d_1/bias:0 is illegal; using conv2d_1/bias_0 instead.
2017-06-11 13:38:01,451 INFO Summary name conv2d_1/bias:0 is illegal; using conv2d_1/bias_0 instead.
INFO:tensorflow:Summary name conv2d_2/kernel:0 is illegal; using conv2d_2/kernel_0 instead.
2017-06-11 13:38:01,455 INFO Summary name conv2d_2/kernel:0 is illegal; using conv2d_2/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_2/kernel:0 is illegal; using conv2d_2/kernel_0 instead.
2017-06-11 13:38:01,461 INFO Summary name conv2d_2/kernel:0 is illegal; using conv2d_2/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_2/bias:0 is illegal; using conv2d_2/bias_0 instead.
2017-06-11 13:38:01,464 INFO Summary name conv2d_2/bias:0 is illegal; using conv2d_2/bias_0 instead.
INFO:tensorflow:Summary name conv2d_2/bias:0 is illegal; using conv2d_2/bias_0 instead.
2017-06-11 13:38:01,471 INFO Summary name conv2d_2/bias:0 is illegal; using conv2d_2/bias_0 instead.
INFO:tensorflow:Summary name conv2d_3/kernel:0 is illegal; using conv2d_3/kernel_0 instead.
2017-06-11 13:38:01,475 INFO Summary name conv2d_3/kernel:0 is illegal; using conv2d_3/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_3/kernel:0 is illegal; using conv2d_3/kernel_0 instead.
2017-06-11 13:38:01,481 INFO Summary name conv2d_3/kernel:0 is illegal; using conv2d_3/kernel_0 instead.
INFO:tensorflow:Summary name conv2d_3/bias:0 is illegal; using conv2d_3/bias_0 instead.
2017-06-11 13:38:01,485 INFO Summary name conv2d_3/bias:0 is illegal; using conv2d_3/bias_0 instead.
INFO:tensorflow:Summary name conv2d_3/bias:0 is illegal; using conv2d_3/bias_0 instead.
2017-06-11 13:38:01,491 INFO Summary name conv2d_3/bias:0 is illegal; using conv2d_3/bias_0 instead.
INFO:tensorflow:Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
2017-06-11 13:38:01,496 INFO Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
INFO:tensorflow:Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
2017-06-11 13:38:01,504 INFO Summary name dense_1/kernel:0 is illegal; using dense_1/kernel_0 instead.
INFO:tensorflow:Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.
2017-06-11 13:38:01,508 INFO Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.
INFO:tensorflow:Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.
2017-06-11 13:38:01,514 INFO Summary name dense_1/bias:0 is illegal; using dense_1/bias_0 instead.
INFO:tensorflow:Summary name dense_2/kernel:0 is illegal; using dense_2/kernel_0 instead.
2017-06-11 13:38:01,518 INFO Summary name dense_2/kernel:0 is illegal; using dense_2/kernel_0 instead.
INFO:tensorflow:Summary name dense_2/kernel:0 is illegal; using dense_2/kernel_0 instead.
2017-06-11 13:38:01,524 INFO Summary name dense_2/kernel:0 is illegal; using dense_2/kernel_0 instead.
INFO:tensorflow:Summary name dense_2/bias:0 is illegal; using dense_2/bias_0 instead.
2017-06-11 13:38:01,527 INFO Summary name dense_2/bias:0 is illegal; using dense_2/bias_0 instead.
INFO:tensorflow:Summary name dense_2/bias:0 is illegal; using dense_2/bias_0 instead.
2017-06-11 13:38:01,534 INFO Summary name dense_2/bias:0 is illegal; using dense_2/bias_0 instead.
INFO:tensorflow:Summary name dense_3/kernel:0 is illegal; using dense_3/kernel_0 instead.
2017-06-11 13:38:01,538 INFO Summary name dense_3/kernel:0 is illegal; using dense_3/kernel_0 instead.
INFO:tensorflow:Summary name dense_3/kernel:0 is illegal; using dense_3/kernel_0 instead.
2017-06-11 13:38:01,547 INFO Summary name dense_3/kernel:0 is illegal; using dense_3/kernel_0 instead.
INFO:tensorflow:Summary name dense_3/bias:0 is illegal; using dense_3/bias_0 instead.
2017-06-11 13:38:01,551 INFO Summary name dense_3/bias:0 is illegal; using dense_3/bias_0 instead.
INFO:tensorflow:Summary name dense_3/bias:0 is illegal; using dense_3/bias_0 instead.
2017-06-11 13:38:01,557 INFO Summary name dense_3/bias:0 is illegal; using dense_3/bias_0 instead.
/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:9: UserWarning: Update your `fit_generator` call to the Keras 2 API: `fit_generator(<generator..., callbacks=[<keras.ca..., epochs=2, steps_per_epoch=5, validation_steps=2, validation_data=<generator..., verbose=1)`
2017-06-11 13:38:02,182 INFO starting new flow...
2017-06-11 13:38:02,185 INFO starting new flow...
Epoch 1/2
Epoch 1/2
4/5 [=======================>......] - ETA: 2s - loss: 0.7522 - acc: 0.5365 
2017-06-11 13:38:12,546 INFO starting new flow...
2017-06-11 13:38:12,547 INFO starting new flow...
Epoch 00000: val_acc improved from -inf to 0.43750, saving model to ../../output/kaggle-sea-lion/07/weights/weights-00-0.44.h5
5/5 [==============================] - 13s - loss: 0.7388 - acc: 0.5458 - val_loss: 0.7050 - val_acc: 0.4375
5/5 [==============================] - 14s - loss: 0.7388 - acc: 0.5458 - val_loss: 0.7050 - val_acc: 0.4375
Epoch 2/2
Epoch 2/2
4/5 [=======================>......] - ETA: 1s - loss: 0.7001 - acc: 0.4635Epoch 00001: val_acc did not improve
5/5 [==============================] - 11s - loss: 0.6979 - acc: 0.4833 - val_loss: 0.7010 - val_acc: 0.4062
5/5 [==============================] - 11s - loss: 0.6979 - acc: 0.4833 - val_loss: 0.7010 - val_acc: 0.4062
2017-06-11 13:38:28,150 INFO Saved last weights to ../../output/kaggle-sea-lion/07/last-weights-medium1-class0.h5

Epoch accuracy/loss


In [8]:
if(RUN_TRAINING):
    logger.info('Training info')
    cnn.show_training_info_keras(history)


2017-06-11 13:38:28,163 INFO Training info

Confusion matrix


In [9]:
cnn.evaluate_dataset_keras(test_generator.flow(), 
                       test_generator.nr_batches, 
                       test_generator.size, 
                       model, 
                       class_labels=['any', 'male'])


2017-06-11 13:38:28,430 INFO Evaluating model performance (32 samples)...
2017-06-11 13:38:28,434 INFO starting new flow...
2017-06-11 13:38:28,437 INFO starting new flow...
2017-06-11 13:38:29,325 INFO Accuracy: 0.75 - Loss: 0.654538691044
2017-06-11 13:38:29,328 INFO Predicting Y for detailed analysis...
/notebooks/datascience-snippets/kaggle-sea-lion/modules/cnn.py:59: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  if(self.y_ds==None):
2017-06-11 13:38:30,901 INFO Accuracy: 0.75
2017-06-11 13:38:30,903 INFO Number of test samples: 32
2017-06-11 13:38:30,905 INFO Kappa score: 0.0 (-1 bad; 0 just luck; 1 great)
/usr/local/lib/python3.4/dist-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
2017-06-11 13:38:30,908 INFO 
             precision    recall  f1-score   support

        any       0.00      0.00      0.00         8
       male       0.75      1.00      0.86        24

avg / total       0.56      0.75      0.64        32

/notebooks/datascience-snippets/kaggle-sea-lion/modules/utils.py:828: RuntimeWarning: invalid value encountered in true_divide
  acc_class = cm.diagonal()/np.sum(cm, axis=0)
2017-06-11 13:38:30,914 INFO Accuracy per class:
2017-06-11 13:38:30,915 INFO any: nan%
2017-06-11 13:38:30,916 INFO male: 75.0%
2017-06-11 13:38:30,917 INFO Confusion matrix:
2017-06-11 13:38:30,919 INFO 
[[ 0  8]
 [ 0 24]]

In [10]:
if(DEBUG):
    a = test_generator.flow()
    cnn.show_predictions(a, 50, model)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: