Train nodule detector with LUNA16 dataset


In [1]:
INPUT_DIR = '../../input/nodules/'
OUTPUT_DIR = '../../output/lung-cancer/02/'
IMAGE_DIMS = (50,50,50,1)

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.ctscan as ctscan

Training

Prepare output dir


In [3]:
utils.mkdirs(OUTPUT_DIR, recreate=True)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
logger.info('Dir ' + OUTPUT_DIR + ' created')


2017-03-25 01:27:29,109 INFO Dir ../../output/lung-cancer/02/ created

Prepare CNN model


In [4]:
logger.info('Prepare CNN for training')
network = cnn.net_nodule2d_swethasubramanian(IMAGE_DIMS)
model = cnn.prepare_cnn_model(network, OUTPUT_DIR, model_file=None)


2017-03-25 01:27:32,524 INFO Prepare CNN for training
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-4-29974134b16e> in <module>()
      1 logger.info('Prepare CNN for training')
----> 2 network = cnn.net_nodule2d_swethasubramanian(IMAGE_DIMS)
      3 model = cnn.prepare_cnn_model(network, OUTPUT_DIR, model_file=None)

/notebooks/datascience-snippets/kaggle-lung-cancer/modules/cnn.py in net_nodule2d_swethasubramanian(image_dims)
     33     net = layers.core.input_data(shape=[None, image_dims[0], image_dims[1], image_dims[2], image_dims[3]], dtype=tf.float32, data_preprocessing=img_prep, data_augmentation=img_aug)
     34 
---> 35     net = layers.conv.conv_2d(net, 50, 3, activation='relu')
     36     net = layers.conv.max_pool_2d(net, 2)
     37     net = layers.conv.conv_2d(net, 64, 3, activation='relu')

/usr/local/lib/python3.4/dist-packages/tflearn/layers/conv.py in conv_2d(incoming, nb_filter, filter_size, strides, padding, activation, bias, weights_init, bias_init, regularizer, weight_decay, trainable, restore, reuse, scope, name)
     63     """
     64     input_shape = utils.get_incoming_shape(incoming)
---> 65     assert len(input_shape) == 4, "Incoming Tensor shape must be 4-D"
     66     filter_size = utils.autoformat_filter_conv2d(filter_size,
     67                                                  input_shape[-1],

AssertionError: Incoming Tensor shape must be 4-D

Train model


In [ ]:
dataset_path = OUTPUT_DIR + 'nodules-train.h5'

with h5py.File(dataset_path, 'r') as train_hdf5:
    X = train_hdf5['X']
    Y = train_hdf5['Y']
    logger.info('X shape ' + str(X.shape))
    logger.info('Y shape ' + str(Y.shape))

    dataset_path = utils.dataset_path(input_dir, 'validate', image_dims)
    with h5py.File(dataset_path, 'r') as validate_hdf5:
        X_validate = validate_hdf5['X']
        Y_validate = validate_hdf5['Y']
        logger.info('X_validate shape ' + str(X_validate.shape))
        logger.info('Y_validate shape ' + str(Y_validate.shape))

        logger.info('Starting CNN training...')
        model.fit(X, Y, 
            validation_set=(X_validate, Y_validate), 
            shuffle=True, 
            batch_size=96, 
            n_epoch=100,
            show_metric=True,
            snapshot_epoch=True,
            run_id='nodule_classifier')

model.save(OUTPUT_DIR + "nodule-classifier.tfl")
logger.info("Network trained and saved as nodule-classifier.tfl!")

Evaluate results


In [ ]:
logger.info('Evaluate dataset')
evaluate_dataset(OUTPUT_DIR + 'nodules-test.h5', model, batch_size=12, confusion_matrix=True)