In [ ]:


In [1]:
# Thanks to Jonathan Mulholland and Aaron Sander from Booz Allen Hamilton
# who made this code public as a part of the Kaggle Data Science Bowl 2017 contest.
# https://www.kaggle.com/c/data-science-bowl-2017/details/tutorial

import SimpleITK as sitk
import numpy as np
import os
from glob import glob
import pandas as pd

from tqdm import tqdm 

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
def make_mask(center,diam,z,
              width,height, depth, 
              spacing, origin,  
              mask_width=32, mask_height=32, mask_depth=32):
    '''
Center : centers of circles px -- list of coordinates x,y,z
diam : diameters of circles px -- diameter
widthXheight : pixel dim of image
spacing = mm/px conversion rate np array x,y,z
origin = x,y,z mm np.array
z = z position of slice in world coordinates mm
    '''
    mask = np.zeros([height,width]) # 0's everywhere except nodule swapping x,y to match img
    #convert to nodule space from world coordinates

    padMask = 5
    
    # Defining the voxel range in which the nodule falls
    v_center = (center-origin)/spacing
    v_diam = int(diam/spacing[0]+padMask)
    v_xmin = np.max([0,int(v_center[0]-v_diam)-padMask])
    v_xmax = np.min([width-1,int(v_center[0]+v_diam)+padMask])
    v_ymin = np.max([0,int(v_center[1]-v_diam)-padMask]) 
    v_ymax = np.min([height-1,int(v_center[1]+v_diam)+padMask])

    v_xrange = range(v_xmin,v_xmax+1)
    v_yrange = range(v_ymin,v_ymax+1)

    # Convert back to world coordinates for distance calculation
    x_data = [x*spacing[0]+origin[0] for x in range(width)]
    y_data = [x*spacing[1]+origin[1] for x in range(height)]

    # SPHERICAL MASK
    # Fill in 1 within sphere around nodule 
#     for v_x in v_xrange:
#         for v_y in v_yrange:
#             p_x = spacing[0]*v_x + origin[0]
#             p_y = spacing[1]*v_y + origin[1]
#             if np.linalg.norm(center-np.array([p_x,p_y,z]))<=diam:
#                 mask[int((p_y-origin[1])/spacing[1]),int((p_x-origin[0])/spacing[0])] = 1.0
               
    # RECTANGULAR MASK
    for v_x in v_xrange:
        for v_y in v_yrange:
            
            p_x = spacing[0]*v_x + origin[0]
            p_y = spacing[1]*v_y + origin[1]
            
            if ((p_x >= (center[0] - mask_width)) &
                (p_x <= (center[0] + mask_width)) & 
                (p_y >= (center[1] - mask_height)) &
                (p_y <= (center[1] + mask_height))):
                
                mask[int((np.abs(p_y-origin[1]))/spacing[1]),int((np.abs(p_x-origin[0]))/spacing[0])] = 1.0
            
    
    # TODO:  The height and width seemed to be switched. 
    # This works but needs to be simplified. It's probably due to SimpleITK versus Numpy transposed indicies.
    left = np.max([0, np.abs(center[0] - origin[0]) - mask_width]).astype(int)
    right = np.min([width, np.abs(center[0] - origin[0]) + mask_width]).astype(int)
    down = np.max([0, np.abs(center[1] - origin[1]) - mask_height]).astype(int)
    up = np.min([height, np.abs(center[1] - origin[1]) + mask_height]).astype(int)
    
    top = np.min([depth, np.abs(center[2] - origin[2]) + mask_depth]).astype(int)
    bottom = np.max([0, np.abs(center[2] - origin[2]) - mask_depth]).astype(int)
    
    bbox = [[down, up], [left, right], [bottom, top]]
    
    return mask, bbox

In [3]:
############
#
# Getting list of image files
luna_path = "luna16_test_data/"
luna_subset_path = luna_path
output_path = "./"
file_list=glob(luna_subset_path+"*.mhd")

In [4]:
#####################
#
# Helper function to get rows in data frame associated 
# with each file
def get_filename(file_list, case):
    for f in file_list:
        if case in f:
            return(f)
#
# The locations of the nodes
#df_node = pd.read_csv(luna_path+"annotations.csv")
df_node = pd.read_csv(luna_path + "candidates_with_annotations.csv")
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list, file_name))
df_node = df_node.dropna()  # Drops the false positives

In [5]:
"""
Normalize pixel depth into Hounsfield units (HU)
This tries to get all pixels between -1000 and 400 HU.
All other HU will be masked.
Then we normalize pixel values between 0 and 1.
"""
def normalizePlanes(npzarray):
     
    maxHU = 400.
    minHU = -1000.
 
    npzarray = (npzarray - minHU) / (maxHU - minHU)
    npzarray[npzarray>1] = 1.
    npzarray[npzarray<0] = 0.
    return npzarray

In [6]:
def normalize_img(img):
    
    '''
    Sets the MHD image to be approximately 1.0 mm voxel size
    
    https://itk.org/ITKExamples/src/Filtering/ImageGrid/ResampleAnImage/Documentation.html
    '''
    new_x_size = int(img.GetSpacing()[0]*img.GetWidth())  # Number of pixels you want for x dimension
    new_y_size = int(img.GetSpacing()[1]*img.GetHeight()) # Number of pixels you want for y dimension
    new_z_size = int(img.GetSpacing()[2]*img.GetDepth())  # Number of pixels you want for z dimesion
    new_size = [new_x_size, new_y_size, new_z_size]
    
#     new_spacing = [old_sz*old_spc/new_sz  for old_sz, old_spc, new_sz in zip(img.GetSize(), img.GetSpacing(), new_size)]

    new_spacing = [1,1,1]  # New spacing to be 1.0 x 1.0 x 1.0 mm voxel size
    interpolator_type = sitk.sitkLinear

    return sitk.Resample(img, new_size, sitk.Transform(), interpolator_type, img.GetOrigin(), new_spacing, img.GetDirection(), 0.0, img.GetPixelIDValue())

In [8]:
#####
#
# Looping over the image files
#
for fcount, img_file in enumerate(tqdm(file_list)):
    
    mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
    
    if mini_df.shape[0]>0: # some files may not have a nodule--skipping those 
        
        '''
        Extracts 2D patches from the 3 planes (transverse, coronal, and sagittal).
        
        The sticky point here is the order of the axes. Numpy is z,y,x and SimpleITK is x,y,z.
        I've found it very difficult to keep the order correct when going back and forth,
        but this code seems to pass the sanity checks.
        '''
        # Load the CT scan (3D .mhd file)
        itk_img = sitk.ReadImage(img_file)  # indices are x,y,z (note the ordering of dimesions)
        
        # Normalize the image spacing so that a voxel is 1x1x1 mm in dimension
        itk_img = normalize_img(itk_img)
       
        # SimpleITK keeps the origin and spacing information for the 3D image volume
        img_array = sitk.GetArrayFromImage(itk_img) # indices are z,y,x (note the ordering of dimesions)
        slice_z, height, width = img_array.shape        
        origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm) - Not same as img_array
        spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coordinates (mm)
        
        for candidate_idx, cur_row in mini_df.iterrows(): # Iterate through all candidates
            
            # This is the real world x,y,z coordinates of possible nodule (in mm)
            candidate_x = cur_row['coordX']
            candidate_y = cur_row['coordY']
            candidate_z = cur_row['coordZ']
            diam = cur_row['diameter_mm']  # Only defined for true positives
            
            mask_width = 32 # This is really the half width so window will be double this width
            mask_height = 32 # This is really the half height so window will be double this height
            mask_depth = 32 # This is really the half depth so window will be double this depth
            
            center = np.array([candidate_x, candidate_y, candidate_z])   # candidate center
            voxel_center = np.rint((center-origin)/spacing).astype(int)  # candidate center in voxel space (still x,y,z ordering)
            
            # Calculates the bounding box (and ROI mask) for desired position
            mask, bbox = make_mask(center, diam, voxel_center[2]*spacing[2]+origin[2],
                                   width, height, slice_z, spacing, origin, 
                                   mask_width, mask_height, mask_depth)
                
            # Transverse slice 2D view - Y-X plane
            # Confer with https://en.wikipedia.org/wiki/Anatomical_terms_of_location#Planes
            img_transverse = normalizePlanes(img_array[voxel_center[2], 
                                                       bbox[0][0]:bbox[0][1], 
                                                       bbox[1][0]:bbox[1][1]])
            
            # Sagittal slice 2D view - Z-Y plane
            img_sagittal = normalizePlanes(img_array[bbox[2][0]:bbox[2][1], 
                                                     bbox[0][0]:bbox[0][1], 
                                                     voxel_center[0]])
            
            # Coronal slice 2D view - Z-X plane
            img_coronal = normalizePlanes(img_array[bbox[2][0]:bbox[2][1], 
                                                    voxel_center[1], 
                                                    bbox[1][0]:bbox[1][1]])
               
            print(img_file)
            plt.figure(figsize=(15,15))
            plt.subplot(1,3,1)
            plt.imshow(img_transverse, cmap='bone')
            plt.title('Transverse view')
            plt.subplot(1,3,2)
            plt.imshow(img_sagittal, cmap='bone')
            plt.title('Sagittal view')
            plt.subplot(1,3,3)
            plt.imshow(img_coronal, cmap='bone')
            plt.title('Coronal view')
            plt.show()


  0%|          | 0/6 [00:00<?, ?it/s]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016233746780170740405.mhd
 17%|█▋        | 1/6 [00:03<00:15,  3.15s/it]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408431413578140925249.mhd
 33%|███▎      | 2/6 [00:04<00:10,  2.73s/it]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.148447286464082095534651426689.mhd
 50%|█████     | 3/6 [00:09<00:10,  3.40s/it]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.154677396354641150280013275227.mhd
 67%|██████▋   | 4/6 [00:11<00:05,  2.78s/it]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.194440094986948071643661798326.mhd
 83%|████████▎ | 5/6 [00:12<00:02,  2.37s/it]
luna16_test_data/1.3.6.1.4.1.14519.5.2.1.6279.6001.227962600322799211676960828223.mhd
100%|██████████| 6/6 [00:13<00:00,  2.04s/it]

In [ ]: