In [1]:
INPUT_DIR = '../../input/kaggle-sea-lion/02/'
OUTPUT_DIR = '../../output/kaggle-sea-lion/05/'
IMAGE_DIMS = (84,84,3)#class0
#IMAGE_DIMS = (84,84,3)#class1
#IMAGE_DIMS = (56,56,3)#class2
#IMAGE_DIMS = (42,42,3)#class3
#IMAGE_DIMS = (26,26,3)#class4
INPUT_DATASET_NAME = 'lion-patches-0px'
SAVE_WEIGHTS_FILE = OUTPUT_DIR + 'last-weights-simple-84-84.h5'
SAVE_MODEL_FILE = None
SAVE_MODEL_FILE = OUTPUT_DIR + 'last-model-simple-84-84.yml'
LOAD_WEIGHTS_FILE = SAVE_WEIGHTS_FILE
LOAD_MODEL_FILE = None
RECREATE_OUTPUT_DIR = False
RUN_TRAINING = True
TRAIN_WEIGHT = (1,1,1,1,1,1)
TEST_WEIGHT = (1,1,1,1,1,1)
TRAIN_EPOCHS = 10
INPUT_RANGE = 1
DEBUG = False
BATCH_SIZE=48
In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import models
from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.lions as lions
In [3]:
utils.mkdirs(OUTPUT_DIR, dirs=['tf-logs','weights'], recreate=RECREATE_OUTPUT_DIR)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
TF_LOGS_DIR = OUTPUT_DIR + 'tf-logs/'
WEIGHTS_DIR = OUTPUT_DIR + 'weights/'
input_dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
logger.info('Output dirs created')
In [4]:
logger.info('Using dataset ' + input_dataset_path + ' as input')
h5file = h5py.File(input_dataset_path, 'r')
#used for image augmentation (creating new images for balancing)
image_augmentation_generator = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=359,
width_shift_range=0,
height_shift_range=0,
shear_range=0,
horizontal_flip=True,
vertical_flip=True)
#applied to all images during training
image_randomize_generator = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=359,
width_shift_range=0,
height_shift_range=0,
shear_range=0,
horizontal_flip=True,
vertical_flip=True)
logger.info('preparing train data')
train_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
train_balance_generator = utils.ClassBalancerGeneratorXY(train_batch_generator,
image_augmentation=image_augmentation_generator,
output_weight=TRAIN_WEIGHT,
max_augmentation_ratio=1,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0, end_ratio=0.7)
train_generator = utils.ImageAugmentationXYGenerator(train_balance_generator, image_randomize_generator)
logger.info('train size=' + str(train_generator.size) + ' batches=' + str(train_generator.nr_batches))
logger.info('preparing valid data')
valid_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
valid_generator = utils.ClassBalancerGeneratorXY(valid_batch_generator,
image_augmentation=image_augmentation_generator,
output_weight=TEST_WEIGHT,
max_augmentation_ratio=1,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0.7, end_ratio=0.85)
logger.info('valid size=' + str(valid_generator.size) + ' batches=' + str(valid_generator.nr_batches))
logger.info('preparing test data')
test_batch_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0, end_ratio=INPUT_RANGE)
test_generator = utils.ClassBalancerGeneratorXY(test_batch_generator,
image_augmentation=image_augmentation_generator,
output_weight=TEST_WEIGHT,
max_augmentation_ratio=1,
max_undersampling_ratio=1,
enforce_max_ratios=False,
batch_size=BATCH_SIZE,
start_ratio=0.85, end_ratio=1)
logger.info('test size=' + str(test_generator.size) + ' batches=' + str(test_generator.nr_batches))
#FIXME when using 1 on end ratio size and nr_batches gets negative (h5 batch generator, not balancer...)
In [5]:
#logger.info('INPUT DATASET DATA')
#dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
#with h5py.File(input_dataset_path, 'r') as h5file:
# logger.info('generator')
# input_generator = utils.BatchGeneratorXYH5(h5file, start_ratio=0.001, end_ratio=0.0012, batch_size=64)
# X, Y = utils.dump_xy_to_array(input_generator.flow(), input_generator.size, x=True, y=True)
# utils.show_images(X, image_labels=utils.onehot_to_label(Y), group_by_label=False, cols=10, is_bgr=True, size=2)
#
# logger.info('x ' + str(np.shape(X)))
# logger.info('y ' + str(np.shape(Y)))
# logger.info(str(utils.class_distribution(Y)))
if(DEBUG):
logger.info('BALANCE GENERATOR DATA')
#dataset_path = INPUT_DIR + utils.dataset_name(INPUT_DATASET_NAME, IMAGE_DIMS)
X_train, Y_train = utils.dump_xy_to_array(train_generator, train_generator.size, x=False, y=True)
logger.info('y ' + str(np.shape(Y_train)))
logger.info(str(utils.class_distribution(Y_train)))
for xs,ys in train_generator.flow():
utils.show_images(xs, image_labels=utils.onehot_to_label(ys), cols=10, is_bgr=True, size=2)
break
In [6]:
logger.info('Load CNN model')
#model = lions.convnet_alexnet2_lion_keras(IMAGE_DIMS)
model = None
if(LOAD_MODEL_FILE!=None and os.path.isfile(LOAD_MODEL_FILE)):
with open(LOAD_MODEL_FILE, 'r') as model_file:
my = model_file.read()
model = models.model_from_yaml(my)
logger.info('loaded model from file ' + LOAD_MODEL_FILE)
else:
model = lions.convnet_medium1_lion_keras(IMAGE_DIMS)
logger.info('loaded model from function convnet_medium1_lion_keras')
if(LOAD_WEIGHTS_FILE!=None and os.path.isfile(LOAD_WEIGHTS_FILE)):
model.load_weights(LOAD_WEIGHTS_FILE)
logger.info('Loaded model weights from ' + LOAD_WEIGHTS_FILE)
logger.info('Model prepared')
In [7]:
if(RUN_TRAINING):
logger.info('Starting CNN training...')
history = model.fit_generator(train_generator.flow(),
steps_per_epoch = train_generator.nr_batches,
nb_epoch = TRAIN_EPOCHS,
callbacks = cnn.get_callbacks_keras(model, WEIGHTS_DIR, TF_LOGS_DIR),
validation_data = valid_generator.flow(),
validation_steps = valid_generator.nr_batches,
verbose = 1)
if(SAVE_MODEL_FILE!=None):
with open(SAVE_MODEL_FILE, 'w') as model_file:
model_file.write(model.to_yaml())
logger.info('Saved last model to ' + SAVE_MODEL_FILE)
if(SAVE_WEIGHTS_FILE!=None):
model.save_weights(SAVE_WEIGHTS_FILE)
logger.info('Saved last weights to ' + SAVE_WEIGHTS_FILE)
In [8]:
if(RUN_TRAINING):
logger.info('Training info')
cnn.show_training_info_keras(history)
In [9]:
cnn.evaluate_dataset_keras(test_generator.flow(),
test_generator.nr_batches,
test_generator.size,
model,
class_labels=lions.CLASS_LABELS)
In [10]:
if(DEBUG):
a = test_generator.flow()
cnn.show_predictions(a, 50, model)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: