In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/15/'
IMAGE_DIMS = (42,42,3)
LABEL_DIMS = (6,)
INPUT_DATASET_NAME = 'lion-patches-0px'
INPUT_DATASET_NAME_FALSE_POSITIVES = 'lion-patches-false-positives'
LOAD_WEIGHTS_FILE = INPUT_DIR + '05/weights-medium1-42x42-0.94.h5'
LOAD_WEIGHTS_FILE = None
LOAD_MODEL_FILE = None
SAVE_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights-medium1-42x42.h5'
SAVE_MODEL_FILE = None
RUN_TRAINING = True
Y_CHANGE = None
Y_CHANGE = ((1,5),(2,5),(3,5),(4,5))
RECREATE_OUTPUT_DIR = True
TRAIN_WEIGHT = (1,1,1,1,1,1)
TEST_WEIGHT = (1,1,1,1,1,1)
TRAIN_EPOCHS = 10
INPUT_RANGE = 1
BATCH_SIZE=48
In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob
import cv2
from keras.preprocessing.image import ImageDataGenerator
from keras import models
from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions
import modules.objectdetect as objectdetect
In [3]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=RECREATE_OUTPUT_DIR)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
TF_LOGS_DIR = OUTPUT_DIR + 'tf-logs/'
WEIGHTS_DIR = OUTPUT_DIR + 'weights/'
input_dataset_path = INPUT_DIR + '02/' + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
input_dataset_path_false_positives = INPUT_DIR + '10/' + utils.dataset_name(INPUT_DATASET_NAME_FALSE_POSITIVES, IMAGE_DIMS)
logger.info('Dir ' + OUTPUT_DIR + ' created')
In [4]:
logger.info('Using dataset ' + input_dataset_path + ' as input')
h5file = h5py.File(input_dataset_path, 'r')
h5file_false_positives = h5py.File(input_dataset_path_false_positives, 'r')
#used for image augmentation (creating new images for balancing)
image_augmentation_generator = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=359,
width_shift_range=0,
height_shift_range=0,
shear_range=0,
horizontal_flip=True,
vertical_flip=True)
#applied to all images during training
image_randomize_generator = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=359,
width_shift_range=0,
height_shift_range=0,
shear_range=0,
horizontal_flip=True,
vertical_flip=True)
logger.info('preparing train data')
#false positives samples
train_batch_generator1 = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
train_batch_generator2 = utils.BatchGeneratorXYH5(h5file_false_positives, start_ratio=0, end_ratio=INPUT_RANGE)
# train_batch_generator = utils.JoinGeneratorsXY([train_batch_generator1,train_batch_generator2])
train_balance_generator = utils.ClassBalancerGeneratorXY(train_batch_generator2,
image_augmentation=image_augmentation_generator,
output_weight=TRAIN_WEIGHT,
max_augmentation_ratio=0,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0, end_ratio=0.7,
change_y=Y_CHANGE)
train_generator = utils.image_augmentation_xy(train_balance_generator.flow(), image_randomize_generator)
logger.info('train size=' + str(train_balance_generator.size) + ' batches=' + str(train_balance_generator.nr_batches))
logger.info('preparing valid data')
valid_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
valid_balance_generator = utils.ClassBalancerGeneratorXY(valid_batch_generator,
image_augmentation=image_augmentation_generator,
output_weight=TEST_WEIGHT,
max_augmentation_ratio=0,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0.7, end_ratio=0.85,
change_y=Y_CHANGE)
logger.info('valid size=' + str(valid_balance_generator.size) + ' batches=' + str(valid_balance_generator.nr_batches))
logger.info('preparing test data')
test_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
test_balance_generator = utils.ClassBalancerGeneratorXY(test_batch_generator,
image_augmentation=image_augmentation_generator,
output_weight=TEST_WEIGHT,
max_augmentation_ratio=0,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0.85, end_ratio=1,
change_y=Y_CHANGE)
logger.info('test size=' + str(test_balance_generator.size) + ' batches=' + str(test_balance_generator.nr_batches))
#FIXME when using 1 on end ratio size and nr_batches gets negative (h5 batch generator, not balancer...)
In [5]:
logger.info('Load CNN model')
#model = lions.convnet_alexnet2_lion_keras(IMAGE_DIMS)
model = None
if(LOAD_MODEL_FILE!=None and os.path.isfile(LOAD_MODEL_FILE)):
with open(LOAD_MODEL_FILE, 'r') as model_file:
my = model_file.read()
model = models.model_from_yaml(my)
logger.info('loaded model from file ' + LOAD_MODEL_FILE)
else:
model = lions.convnet_medium1_lion_keras(IMAGE_DIMS)
logger.info('loaded model from function convnet_medium1_lion_keras')
if(LOAD_WEIGHTS_FILE!=None and os.path.isfile(LOAD_WEIGHTS_FILE)):
model.load_weights(LOAD_WEIGHTS_FILE)
logger.info('Loaded model weights from ' + LOAD_WEIGHTS_FILE)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
logger.info('Model prepared')
In [6]:
if(RUN_TRAINING):
logger.info('Starting CNN training...')
history = model.fit_generator(train_generator,
steps_per_epoch = train_balance_generator.nr_batches,
nb_epoch = TRAIN_EPOCHS,
callbacks = cnn.get_callbacks_keras(model, WEIGHTS_DIR, TF_LOGS_DIR),
validation_data = valid_balance_generator.flow(),
validation_steps = valid_balance_generator.nr_batches,
verbose = 1)
if(SAVE_MODEL_FILE!=None):
with open(SAVE_MODEL_FILE, 'w') as model_file:
model_file.write(model.to_yaml())
logger.info('Saved last model to ' + SAVE_MODEL_FILE)
if(SAVE_WEIGHTS_FILE!=None):
model.save_weights(SAVE_WEIGHTS_FILE)
logger.info('Saved last weights to ' + SAVE_WEIGHTS_FILE)
In [7]:
if(RUN_TRAINING):
logger.info('Training info')
cnn.show_training_info_keras(history)
In [8]:
cnn.evaluate_dataset_keras(test_balance_generator.flow(),
test_balance_generator.nr_batches,
test_balance_generator.size,
model,
class_labels=lions.CLASS_LABELS)
In [9]:
a = test_balance_generator.flow()
cnn.show_predictions(a, 50, model)
In [ ]: