Train sea lion classifier with a convnet


In [1]:
INPUT_DIR = '../../output/kaggle-sea-lion/02/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/03/'
IMAGE_DIMS = (148,148,3)

MODEL_FILE = OUTPUT_DIR + 'sea-lion-classifier'

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions


Using TensorFlow backend.

Training

Prepare output dir


In [3]:
utils.mkdirs(OUTPUT_DIR, recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
logger.info('Dir ' + OUTPUT_DIR + ' created')

load_model_file = None
if(os.path.isfile(MODEL_FILE + '.index')):
    load_model_file = MODEL_FILE
    logger.info('Found previous model file. It will be loaded on cnn network as its initial state. ' + load_model_file)


2017-04-03 01:07:18,618 INFO Dir ../../output/kaggle-sea-lion/03/ created
2017-04-03 01:07:18,620 INFO Found previous model file. It will be loaded on cnn network as its initial state. ../../output/kaggle-sea-lion/03/sea-lion-classifier

Prepare CNN model


In [4]:
logger.info('Prepare CNN for training')
network = lions.convnet_alexnet_lion(IMAGE_DIMS)
model = cnn.prepare_cnn_model(network, OUTPUT_DIR, model_file=load_model_file)


2017-04-03 01:07:21,282 INFO Prepare CNN for training
2017-04-03 01:07:21,463 INFO Prepare CNN
2017-04-03 01:07:21,464 INFO Preparing output dir
2017-04-03 01:07:21,465 INFO Initializing network...
2017-04-03 01:07:23,967 INFO Network initialized
2017-04-03 01:07:23,969 INFO Load previous training...
2017-04-03 01:07:26,518 INFO Model loaded

Train model


In [5]:
dataset_path = INPUT_DIR + utils.dataset_name('lion-patches', IMAGE_DIMS)

with h5py.File(dataset_path, 'r') as hdf5:
    X,Y = utils.dataset_xy_range(hdf5, 0, 0.9)
    logger.info('X shape ' + str(X.shape))
    logger.info('Y shape ' + str(Y.shape))

    logger.info('Starting CNN training...')
    model.fit(X, Y, 
        validation_set=0.2,
        shuffle=True, 
        batch_size=96, 
        n_epoch=10,
        show_metric=True,
        snapshot_epoch=False,
        run_id='sea_lion_classifier')

model.save(MODEL_FILE)
logger.info("Network trained and saved as " + MODEL_FILE)


Training Step: 29  | total loss: 1.60029 | time: 32.292s
| Momentum | epoch: 010 | loss: 1.60029 - acc: 0.3534 -- iter: 192/216
Training Step: 30  | total loss: 1.54722 | time: 58.554s
| Momentum | epoch: 010 | loss: 1.54722 - acc: 0.3659 | val_loss: 1.44306 - val_acc: 0.5370 -- iter: 216/216
--
INFO:tensorflow:/notebooks/output/kaggle-sea-lion/03/tf-checkpoint-best5370 is not in all_model_checkpoint_paths. Manually adding it.
2017-04-03 00:54:07,168 INFO /notebooks/output/kaggle-sea-lion/03/tf-checkpoint-best5370 is not in all_model_checkpoint_paths. Manually adding it.
INFO:tensorflow:/notebooks/output/kaggle-sea-lion/03/sea-lion-classifier is not in all_model_checkpoint_paths. Manually adding it.
2017-04-03 00:54:14,543 INFO /notebooks/output/kaggle-sea-lion/03/sea-lion-classifier is not in all_model_checkpoint_paths. Manually adding it.
2017-04-03 00:54:14,670 INFO Network trained and saved as ../../output/kaggle-sea-lion/03/sea-lion-classifier

Evaluate results


In [5]:
logger.info('Evaluate dataset')
dataset_path = INPUT_DIR + utils.dataset_name('lion-patches', IMAGE_DIMS)

with h5py.File(dataset_path, 'r') as hdf5:
    X,Y = utils.dataset_xy_range(hdf5, 0.9, 1)
    cnn.evaluate_dataset(X, Y, model, batch_size=24, detailed=True)


2017-04-03 01:07:35,262 INFO Evaluate dataset
2017-04-03 01:07:37,248 INFO Accuracy: [0.70000001192092898]
2017-04-03 01:07:39,117 INFO Kappa score (was this luck?): -0.125
2017-04-03 01:07:39,119 INFO Confusion matrix:
2017-04-03 01:07:39,120 INFO [[ 0  2  0  0]
 [ 4 18  0  0]
 [ 0  3  0  0]
 [ 0  3  0  0]]

In [ ]:


In [ ]:


In [ ]:


In [ ]: