In [3]:
from __future__ import print_function

import sys
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

from model import get_model
from utils import crps, real_to_cdf, preprocess

In [8]:
PREPROCDATA = '/storage/hpc_dmytro/Kaggle/SDSB/images/keras_size64/'
MODELS = '/storage/hpc_dmytro/Kaggle/SDSB/models/keras/size64/'

def load_train_data():
    """
    Load training data from .npy files.
    """
    X = np.load(PREPROCDATA + '/X_train.npy')
    y = np.load(PREPROCDATA + '/y_train.npy')

    X = X.astype(np.float32)
    X /= 255

    seed = np.random.randint(1, 10e6)
    np.random.seed(seed)
    np.random.shuffle(X)
    np.random.seed(seed)
    np.random.shuffle(y)

    return X, y


def split_data(X, y, split_ratio=0.2):
    """
    Split data into training and testing.
    :param X: X
    :param y: y
    :param split_ratio: split ratio for train and test data
    """
    split = X.shape[0] * split_ratio
    X_test = X[:split, :, :, :]
    y_test = y[:split, :]
    X_train = X[split:, :, :, :]
    y_train = y[split:, :]

    return X_train, y_train, X_test, y_test


def train():
    """
    Training systole and diastole models.
    """
    print('Loading and compiling models...')
    model_systole = get_model()
    model_diastole = get_model()

    print('Loading training data...')
    X, y = load_train_data()

    print('Pre-processing images...')
    X = preprocess(X)

    # split to training and test
    X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2)

    # define image generator for random rotations
    datagen = ImageDataGenerator(featurewise_center=False,
                                 featurewise_std_normalization=False,
                                 rotation_range=15)

    nb_iter = 1
    epochs_per_iter = 1
    batch_size = 32
    calc_crps = 1  # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed)

    # remember min val. losses (best iterations), used as sigmas for submission
    min_val_loss_systole = sys.float_info.max
    min_val_loss_diastole = sys.float_info.max

    print('-'*50)
    print('Training...')
    print('-'*50)

    for i in range(nb_iter):
        print('-'*50)
        print('Iteration {0}/{1}'.format(i + 1, nb_iter))
        print('-'*50)

        hist_systole = model_systole.fit_generator(
                                         datagen.flow(X_train, y_train[:, 0], batch_size=batch_size, shuffle=True),
                                         samples_per_epoch=X_train.shape[0],
                                         nb_epoch=epochs_per_iter, verbose=1,
                                         validation_data=(X_test, y_test[:, 0]),
                                         nb_worker=1)

        print('Fitting diastole model...')
        hist_diastole = model_diastole.fit_generator(
                                         datagen.flow(X_train, y_train[:, 1], batch_size=batch_size, shuffle=True),
                                         samples_per_epoch=X_train.shape[0],
                                         nb_epoch=epochs_per_iter, verbose=1,
                                         validation_data=(X_test, y_test[:, 1]),
                                         nb_worker=1)

        # sigmas for predicted data, actually loss function values (RMSE)
        loss_systole = hist_systole.history['loss'][-1]
        loss_diastole = hist_diastole.history['loss'][-1]
        val_loss_systole = hist_systole.history['val_loss'][-1]
        val_loss_diastole = hist_diastole.history['val_loss'][-1]

        if calc_crps > 0 and i % calc_crps == 0:
            print('Evaluating CRPS...')
            pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1)
            pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1)
            val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1)
            val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1)

            # CDF for train and test data (actually a step function)
            cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1])))
            cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1])))

            # CDF for predicted data
            cdf_pred_systole = real_to_cdf(pred_systole, loss_systole)
            cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole)
            cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole)
            cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole)

            # evaluate CRPS on training data
            crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole)))
            print('CRPS(train) = {0}'.format(crps_train))

            # evaluate CRPS on test data
            crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole)))
            print('CRPS(test) = {0}'.format(crps_test))

        print('Saving weights...')
        # save weights so they can be loaded later
        model_systole.save_weights('weights_systole.hdf5', overwrite=True)
        model_diastole.save_weights('weights_diastole.hdf5', overwrite=True)

        # for best (lowest) val losses, save weights
        if val_loss_systole < min_val_loss_systole:
            min_val_loss_systole = val_loss_systole
            model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True)

        if val_loss_diastole < min_val_loss_diastole:
            min_val_loss_diastole = val_loss_diastole
            model_systole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True)

        # save best (lowest) val losses in file (to be later used for generating submission)
        with open('val_loss.txt', mode='w+') as f:
            f.write(str(min_val_loss_systole))
            f.write('\n')
            f.write(str(min_val_loss_diastole))

In [9]:
train()


INFO (theano.gof.compilelock): Refreshing lock /home/hpc_dmytro/.theano/compiledir_Linux-3.4.90-x86_64-with-redhat-6.7-Carbon-x86_64-2.7.3-64/lock_dir/lock
INFO:theano.gof.compilelock:Refreshing lock /home/hpc_dmytro/.theano/compiledir_Linux-3.4.90-x86_64-with-redhat-6.7-Carbon-x86_64-2.7.3-64/lock_dir/lock
Loading and compiling models...
Loading training data...
Pre-processing images...
  38/5331 [..............................] - ETA: 1221s
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-9-93fd337a0d5c> in <module>()
----> 1 train()

<ipython-input-8-b62dcee1347f> in train()
     49 
     50     print('Pre-processing images...')
---> 51     X = preprocess(X)
     52 
     53     # split to training and test

/home/hpc_dmytro/Kaggle/SecondDataScienceBowl/Code/Forum_code/Keras/utils.py in preprocess(X)
     38     for i in range(X.shape[0]):
     39         for j in range(X.shape[1]):
---> 40             X[i, j] = denoise_tv_chambolle(X[i, j], weight=0.1, multichannel=False)
     41         progbar.add(1)
     42     return X

/home/hpc_dmytro/python/lib/python2.7/site-packages/skimage/restoration/_denoise.pyc in denoise_tv_chambolle(im, weight, eps, n_iter_max, multichannel)
    332 
    333     if im.ndim == 2:
--> 334         out = _denoise_tv_chambolle_2d(im, weight, eps, n_iter_max)
    335     elif im.ndim == 3:
    336         if multichannel:

/home/hpc_dmytro/python/lib/python2.7/site-packages/skimage/restoration/_denoise.pyc in _denoise_tv_chambolle_2d(im, weight, eps, n_iter_max)
    235         gy[:, :-1] = np.diff(out, axis=1)
    236         norm = np.sqrt(gx ** 2 + gy ** 2)
--> 237         E += weight * norm.sum()
    238         norm *= 0.5 / weight
    239         norm += 1

KeyboardInterrupt: 

In [ ]: