Train 3d nodule detector with LUNA16 dataset


In [1]:
INPUT_DIR = '../../input/nodules/'
OUTPUT_DIR = '../../output/lung-cancer/03/'
IMAGE_DIMS = (50,50,50,1)

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
import sklearn
import os
import glob

from modules.logging import logger
import modules.utils as utils
from modules.utils import Timer
import modules.logging
import modules.cnn as cnn
import modules.ctscan as ctscan

Training

Prepare output dir


In [7]:
utils.mkdirs(OUTPUT_DIR, recreate=False)
modules.logging.setup_file_logger(OUTPUT_DIR + 'out.log')
logger.info('Dir ' + OUTPUT_DIR + ' created')


2017-03-27 10:58:58,438 INFO Dir ../../output/lung-cancer/03/ created

Prepare CNN model


In [8]:
logger.info('Prepare CNN for training')
network = cnn.net_nodule3d_swethasubramanian(IMAGE_DIMS)
model = cnn.prepare_cnn_model(network, OUTPUT_DIR, model_file=None)


2017-03-27 10:59:01,501 INFO Prepare CNN for training
2017-03-27 10:59:01,581 INFO CNN model already loaded. Reusing it.

Train model


In [ ]:
dataset_path = INPUT_DIR + 'nodules-train.h5'

with h5py.File(dataset_path, 'r') as train_hdf5:
    X = train_hdf5['X']
    Y = train_hdf5['Y']
    logger.info('X shape ' + str(X.shape))
    logger.info('Y shape ' + str(Y.shape))

    dataset_path = INPUT_DIR + 'nodules-validate.h5'
    with h5py.File(dataset_path, 'r') as validate_hdf5:
        X_validate = validate_hdf5['X']
        Y_validate = validate_hdf5['Y']
        logger.info('X_validate shape ' + str(X_validate.shape))
        logger.info('Y_validate shape ' + str(Y_validate.shape))

        logger.info('Starting CNN training...')
        model.fit(X, Y, 
            validation_set=(X_validate, Y_validate), 
            shuffle=True, 
            batch_size=96, 
            n_epoch=100,
            show_metric=True,
            snapshot_epoch=True,
            run_id='nodule_classifier')

model.save(OUTPUT_DIR + "nodule-classifier.tfl")
logger.info("Network trained and saved as nodule-classifier.tfl!")


Training Step: 6  | total loss: 3.00226 | time: 87.080s
| Adam | epoch: 001 | loss: 3.00226 - acc: 0.6251 -- iter: 0576/6616

Evaluate results


In [6]:
logger.info('Evaluate dataset')
cnn.evaluate_dataset(OUTPUT_DIR + 'nodules-test.h5', model, batch_size=12, confusion_matrix=True)


2017-03-27 10:56:56,376 INFO Evaluate dataset
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-6-ebaae1719138> in <module>()
      1 logger.info('Evaluate dataset')
----> 2 cnn.evaluate_dataset(OUTPUT_DIR + 'nodules-test.h5', model, batch_size=12, confusion_matrix=True)

/notebooks/datascience-snippets/kaggle-lung-cancer/modules/cnn.py in evaluate_dataset(dataset_path, model, batch_size, confusion_matrix)
     88 
     89 def evaluate_dataset(dataset_path, model, batch_size=12, confusion_matrix=False):
---> 90     with h5py.File(dataset_path, 'r') as hdf5:
     91         X = hdf5['X']
     92         Y = hdf5['Y']

/usr/local/lib/python3.5/dist-packages/h5py/_hl/files.py in __init__(self, name, mode, driver, libver, userblock_size, swmr, **kwds)
    270 
    271                 fapl = make_fapl(driver, libver, **kwds)
--> 272                 fid = make_fid(name, mode, userblock_size, fapl, swmr=swmr)
    273 
    274                 if swmr_support:

/usr/local/lib/python3.5/dist-packages/h5py/_hl/files.py in make_fid(name, mode, userblock_size, fapl, fcpl, swmr)
     90         if swmr and swmr_support:
     91             flags |= h5f.ACC_SWMR_READ
---> 92         fid = h5f.open(name, flags, fapl=fapl)
     93     elif mode == 'r+':
     94         fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)

h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/tmp/pip-at6d2npe-build/h5py/_objects.c:2684)()

h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/tmp/pip-at6d2npe-build/h5py/_objects.c:2642)()

h5py/h5f.pyx in h5py.h5f.open (/tmp/pip-at6d2npe-build/h5py/h5f.c:1930)()

OSError: Unable to open file (Unable to open file: name = '../../output/lung-cancer/03/nodules-test.h5', errno = 2, error message = 'no such file or directory', flags = 0, o_flags = 0)

In [ ]: