In [1]:
%matplotlib inline
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import dicom
import os
import scipy.ndimage
import matplotlib.pyplot as plt
from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
# Some constants
INPUT_FOLDER = '/media/ivanvladimir/musica/stage1/'
patients = os.listdir(INPUT_FOLDER)
patients.sort()
print (patients)
IMG_SIZE_PX = 40
SLICE_COUNT = 20
In [2]:
# Load the scans in given folder path
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
In [3]:
MIN_BOUND = -1000.0
MAX_BOUND = 400.0
def normalize(image):
image = (image - MIN_BOUND) / (MAX_BOUND - MIN_BOUND)
image[image>1] = 1.
image[image<0] = 0.
return image
PIXEL_MEAN = 0.25
def zero_center(image):
image = image - PIXEL_MEAN
return image
def get_pixels_hu(slices):
image = np.stack([s.pixel_array for s in slices])
# Convert to int16 (from sometimes int16),
# should be possible as values should always be low enough (<32k)
image = image.astype(np.int16)
# Set outside-of-scan pixels to 0
# The intercept is usually -1024, so air is approximately 0
image[image == -2000] = 0
# Convert to Hounsfield units (HU)
for slice_number in range(len(slices)):
intercept = slices[slice_number].RescaleIntercept
slope = slices[slice_number].RescaleSlope
if slope != 1:
image[slice_number] = slope * image[slice_number].astype(np.float64)
image[slice_number] = image[slice_number].astype(np.int16)
image[slice_number] += np.int16(intercept)
#image[slice_number]=normalize(zero_center(image[slice_number]))
#Revisar esto para saber cómo funciona
return np.array(image, dtype=np.int16)
In [4]:
"""
Created on Tue Mar 7 12:33:50 2017
@author: juan
"""
import naturalCubicSpline as spline
import numericalIntegral
import csv
values_juan={}
with open('splineData.csv', 'r') as csvfile:
values_reader = csv.reader(csvfile)
for row in values_reader:
try:
values_juan[float(row[0])]=float(row[1])
except ValueError:
pass
def g_(i):
if i <-999:
i=-999
if i>0:
return i
return values_juan[i]
def get_pixels_juan(slices):
vfunc = np.vectorize(g_)
slices=vfunc(slices)
return slices
#b = np.array([[8,1,7], [4,3,9], [5,2,6]])
#vecfun = np.vectorize(g)
#result = vecfun(b)
#print (result)
In [5]:
juan_data=[]
for i in range(-1000,3000):
juan_data.append(g_(i))
plt.plot(juan_data)
plt.show()
In [46]:
first_patient = load_scan(INPUT_FOLDER + patients[0])
first_patient_pixels = get_pixels_hu(first_patient)
plt.hist(first_patient_pixels.flatten(), bins=80, color='c')
plt.xlabel("Hounsfield Units (HU)")
plt.ylabel("Frequency")
plt.show()
first_patient_pixels_juan = get_pixels_juan(first_patient_pixels)
plt.hist(first_patient_pixels_juan.flatten(), bins=80, color='r')
plt.xlabel("Hounsfield Units (HU)")
plt.ylabel("Frequency")
plt.show()
# Show some slice in the middle
plt.imshow(first_patient_pixels[80], cmap=plt.cm.gray)
plt.show()
# Show juan transformation
first_patient_pixels_juan = get_pixels_juan(first_patient_pixels)
plt.imshow(first_patient_pixels_juan[80], cmap=plt.cm.gray)
plt.show()
In [47]:
plt.imshow(first_patient_pixels_juan[80]-first_patient_pixels[80], cmap=plt.cm.gray)
plt.show()
In [6]:
def resample(image, scan, new_spacing=[1,1,1]):
# Determine current pixel spacing
spacing = np.array([scan[0].SliceThickness] + scan[0].PixelSpacing, dtype=np.float32)
resize_factor = spacing / new_spacing
new_real_shape = image.shape * resize_factor
new_shape = np.round([new_real_shape[0],52,52])
real_resize_factor = new_shape / image.shape
new_spacing = spacing / real_resize_factor
image = scipy.ndimage.interpolation.zoom(image, (real_resize_factor), mode='nearest')
#image.resize((SLICE_COUNT, IMG_SIZE_PX, IMG_SIZE_PX))
return image[-20:], new_spacing
In [7]:
pix_resampled, spacing = resample(first_patient_pixels, first_patient, [10,5,5])
print("Shape before resampling\t", first_patient_pixels.shape)
print("Shape after resampling\t", pix_resampled.shape)
In [91]:
def plot_3d(image, threshold=-400):
# Position the scan upright,
# so the head of the patient would be at the top facing the camera
p = image.transpose(2,1,0)
verts, faces = measure.marching_cubes(p, threshold)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
# Fancy indexing: `verts[faces]` to generate a collection of triangles
mesh = Poly3DCollection(verts[faces], alpha=0.70)
face_color = [0.45, 0.45, 0.75]
mesh.set_facecolor(face_color)
ax.add_collection3d(mesh)
ax.set_xlim(0, p.shape[0])
ax.set_ylim(0, p.shape[1])
ax.set_zlim(0, p.shape[2])
plt.show()
In [92]:
plot_3d(pix_resampled, 400)
In [93]:
plt.imshow(pix_resampled[10], cmap=plt.cm.gray)
plt.show()
In [94]:
def largest_label_volume(im, bg=-1):
vals, counts = np.unique(im, return_counts=True)
counts = counts[vals != bg]
vals = vals[vals != bg]
if len(counts) > 0:
return vals[np.argmax(counts)]
else:
return None
def segment_lung_mask(image, fill_lung_structures=True):
# not actually binary, but 1 and 2.
# 0 is treated as background, which we do not want
binary_image = np.array(image > -300, dtype=np.int8)+1
labels = measure.label(binary_image)
# Pick the pixel in the very corner to determine which label is air.
# Improvement: Pick multiple background labels from around the patient
# More resistant to "trays" on which the patient lays cutting the air
# around the person in half
background_label = labels[0,0,0]
#Fill the air around the person
binary_image[background_label == labels] = 2
# Method of filling the lung structures (that is superior to something like
# morphological closing)
if fill_lung_structures:
# For every slice we determine the largest solid structure
for i, axial_slice in enumerate(binary_image):
axial_slice = axial_slice - 1
labeling = measure.label(axial_slice)
l_max = largest_label_volume(labeling, bg=0)
if l_max is not None: #This slice contains some lung
binary_image[i][labeling != l_max] = 1
binary_image -= 1 #Make the image actual binary
binary_image = 1-binary_image # Invert it, lungs are now 1
# Remove other air pockets insided body
labels = measure.label(binary_image, background=0)
l_max = largest_label_volume(labels, bg=0)
if l_max is not None: # There are air pockets
binary_image[labels != l_max] = 0
return binary_image
In [95]:
segmented_lungs = segment_lung_mask(pix_resampled, False)
segmented_lungs_fill = segment_lung_mask(pix_resampled, True)
In [96]:
plt.imshow(segmented_lungs[10], cmap=plt.cm.gray)
plt.show()
In [97]:
plot_3d(segmented_lungs, 0)
In [98]:
plot_3d(segmented_lungs_fill, 0)
In [99]:
plot_3d(segmented_lungs_fill - segmented_lungs, 0)
In [9]:
labels = pd.read_csv('/media/corpora/cancer/stage1_labels.csv', index_col=0)
much_data = []
for num,patient in enumerate(patients[:1100]):
if num % 1 == 0:
print(num,patient)
try:
label = labels.get_value(patient, 'cancer')
patient = load_scan(INPUT_FOLDER + patient)
patient_pixels = get_pixels_hu(patient)
patient_pixels = get_pixels_juan(patient_pixels)
pix_resampled, spacing = resample(patient_pixels, patient, [10,5,5])
#print(pix_resampled.shape)
#segmented_lungs = segment_lung_mask(pix_resampled, False)
#segmented_lungs_fill = segment_lung_mask(pix_resampled, True)
#img_data,label = process_data(patient,labels,img_px_size=IMG_SIZE_PX, hm_slices=SLICE_COUNT)
#print(img_data.shape,label)
if label == 1: label=np.array([0,1])
elif label == 0: label=np.array([1,0])
plt.imshow(pix_resampled[10], cmap=plt.cm.gray)
plt.show()
much_data.append([pix_resampled,label])
except KeyError as e:
print('This is unlabeled data!')
#np.save('muchdata-{}-{}-{}.npy'.format(IMG_SIZE_PX,IMG_SIZE_PX,SLICE_COUNT), much_data)
np.save('muchdata.npy', much_data)
In [2]:
import tensorflow as tf
import numpy as np
n_classes = 2
batch_size = 10
x = tf.placeholder('float')
y = tf.placeholder('float')
keep_rate = 0.8
In [3]:
def conv3d(x, W):
return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')
def maxpool3d(x):
# size of window movement of window as you slide about
return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1], padding='SAME')
In [8]:
VAL_MAGIC=162000
def convolutional_neural_network(x):
# # 5 x 5 x 5 patches, 1 channel, 32 features to compute.
weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,3,1,32])),
# 5 x 5 x 5 patches, 32 channels, 64 features to compute.
'W_conv2':tf.Variable(tf.random_normal([3,3,3,32,32])),
# 64 features
'W_fc':tf.Variable(tf.random_normal([VAL_MAGIC ,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([32])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
# image X image Y image Z
x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
conv1 = tf.nn.relu(conv3d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool3d(conv1)
conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool3d(conv2)
fc = tf.reshape(conv2,[-1, VAL_MAGIC ])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
In [9]:
much_data = np.load('muchdata.npy')
# If you are working with the basic sample data, use maybe 2 instead of 100 here... you don't have enough data to really do this
train_data = much_data[:18]
validation_data = much_data[18:]
for data in much_data:
print(data[0].shape)
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=prediction,labels=y) )
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)
hm_epochs = 100
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
successful_runs = 0
total_runs = 0
for epoch in range(hm_epochs):
epoch_loss = 0
#print (epoch)
#print (train_data.shape)
for data in train_data:
total_runs += 1
#try:
X = data[0]
Y = data[1]
_, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
epoch_loss += c
successful_runs += 1
'''except Exception as e:
# I am passing for the sake of notebook space, but we are getting 1 shaping issue from one
# input tensor. Not sure why, will have to look into it. Guessing it's
# one of the depths that doesn't come to 20.
pass
#print(str(e))'''
print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
#print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
print('Done. Finishing accuracy:')
#print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
print('fitment percent:',successful_runs/total_runs)
# Run this locally:
train_neural_network(x)
In [ ]: