In [ ]:
import tensorflow as tf
#import tensorflow.contrib.learn.python.learn as learn
import tflearn
import scipy as sp
from scipy.signal import find_peaks_cwt as peakFinder
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle, randint
import pandas as pd
import six
from sklearn.utils import shuffle as mutualShuf
from sklearn.preprocessing import normalize
from sklearn.metrics import roc_curve, roc_auc_score
import datetime
%matplotlib inline
In [ ]:
k = 10
In [ ]:
def importPickle(fileLocation): # File location is ./inData/6060DataFrame.pkl
"""
Imports Daniel Wilson's datafile and respectively returns a matrix of class
(whether patient is healthy or unhealthy) data, and a matrix of coil data.
"""
# Import dataframe
path60 = fileLocation
df60 = pd.read_pickle(path60)
# Separate pandas dataframe into classification and data arrays
classData = df60["Classification"].as_matrix()
coilData = df60["Coil Data"].as_matrix()
return classData, coilData
def splitData(coilData, classData):
"""
Split data into healthy and ill types.
"""
illData = []
healthData = []
for index, item in enumerate(classData):
if item == 1:
illData.append(coilData[index])
if item == 0:
healthData.append(coilData[index])
return illData, healthData
classData, coilData = importPickle("./inData/6060DataFrame.pkl")
# Normalise the data
for index, item in enumerate(coilData):
coilData[index] = normalize(item, axis=1)
illData, healthData = splitData(coilData, classData)
if k == 1:
illUnseen = np.vstack(np.array(illData[:20]))#[:,::40]
healthUnseen = np.vstack(np.array(healthData[:20]))#[:,::40]
illData = np.vstack(np.array(illData[20:]))#[:,::40]
healthData = np.vstack(np.array(healthData[20:]))#[:,::40]
print(illData.shape, healthData.shape,"\n", illUnseen.shape, healthUnseen.shape)
if k != 1:
illData = np.array_split(illData, k)
healthData = np.array_split(healthData, k)
In [ ]:
# Various plots...
r = randint(0,390)
print(r)
fig, ax = plt.subplots(3,3)
for i in np.arange(0,ax.shape[0],1):
for j in np.arange(0,ax.shape[1],1):
r = r+1
for l in np.arange(0,15):
ax[i,j].plot(coilData[r][l, 0:2000])
ax[i,j].set_xticks([])
In [ ]:
def processClassData(classData):
"""
Process classData.
Returns a one-hot array of shape [len(classData), 2].
"""
# Convert label data to one-hot array
classDataOH = np.zeros((len(classData),2))
classDataOH[np.arange(len(classData)), classData] = 1
return classDataOH
def visualiseData(ecgData, classData, gridSize, axis):
"""
Plot labelled example data in a gridSize*gridSize grid.
"""
fig, ax = plt.subplots(gridSize, gridSize)
plt.suptitle("Labelled example data")
r = randint(0,len(classData)-16)
l = 0
for i in np.arange(0,gridSize,1):
for j in np.arange(0,gridSize,1):
l = l + 1
ax[i,j].plot(ecgData[r+l])
if axis == False:
ax[i,j].axis("off")
ax[i,j].annotate(classData[r+l], xy=(0, 0), xycoords='axes points',\
size=10, ha='left', va='top')
def functionTown(illArr, healthArr, shuffle):
"""
Return the processed ecgData and the classData (one-hot). Also return arrays of ill and healthy ppts.
If shuffle is true, shuffle data.
"""
print("ill samples", len(illArr))
print("healthy samples", len(healthArr))
classData = []
for i in np.arange(0, len(illArr)*15, 1):
classData.append(1)
for i in np.arange(0, len(healthArr)*15, 1):
classData.append(0)
ecgData = np.reshape(np.append(illArr, healthArr), (-1,2000))
print(ecgData.shape)
if shuffle == True:
classData, ecgData = mutualShuf(np.array(classData), ecgData, random_state=0)
classDataOH = processClassData(classData)
ecgData = np.reshape(ecgData, (-1,2000,1))
return ecgData, classDataOH, classData
if k == 1:
ecgData, classDataOH, classData = functionTown(illData, healthData, True)
# Reintegrate the found values...
#ecgData = np.cumsum(ecgData, axis=1)
# Get the unseen data:
unseenData, unseenClassOH, unseenClass = functionTown(illUnseen, healthUnseen, True)
#unseenData = np.cumsum(unseenData, axis=1)
iUnseen, hUnseen = splitData(unseenData, unseenClass)
unseenHL = np.tile([1,0], (len(hUnseen), 1))
unseenIL = np.tile([0,1], (len(iUnseen), 1))
kfoldData = []
kfoldLabels = []
kfoldLabelsOH = []
if k != 1:
kfoldStuff = []
for i in np.arange(0,k,1):
kfoldStuff.append(functionTown(illData[i], healthData[i], True))
kfoldData.append(kfoldStuff[i][0])
kfoldLabelsOH.append(kfoldStuff[i][1])
kfoldLabels.append(kfoldStuff[i][2])
print(len(kfoldData), len(kfoldLabels), len(kfoldLabelsOH))
In [ ]:
visualiseData(kfoldData[0], kfoldLabels[0], 2, False)
#plt.savefig("../thesis/images/mcg1D.pdf")
In [ ]:
plt.plot(np.arange(0,1000,0.5), kfoldData[0][4])
plt.title("Example MCG output from single SQUID")
plt.xlabel("Time (ms)")
In [ ]:
if k == 1:
healthEval = []
illEval = []
unseenSpec = []
unseenSens = []
unseenAvg = []
unseenroc = []
aucu = []
spec = []
sens = []
roc = []
auc = []
In [ ]:
if k != 1:
for i in np.arange(0,k,1):
sess = tf.InteractiveSession()
tf.reset_default_graph()
tflearn.initializations.normal()
# Input layer:
net = tflearn.layers.core.input_data(shape=[None, 2000, 1])
# First layer:
net = tflearn.layers.conv.conv_1d(net, 32, 5, activation="leaky_relu")
net = tflearn.layers.conv.max_pool_1d(net, 2, strides=2)
# Second layer:
net = tflearn.layers.conv.conv_1d(net, 64, 5, activation="leaky_relu")
net = tflearn.layers.conv.max_pool_1d(net, 2, strides=2)
# Third layer (added)
#net = tflearn.layers.conv.conv_1d(net, 64, 5, activation="leaky_relu")
#net = tflearn.layers.conv.max_pool_1d(net, 2)
# Fully connected layer 1:
net = tflearn.layers.core.fully_connected(net, 1024, regularizer="L2", weight_decay=0.001, activation="leaky_relu")
net = tflearn.layers.core.fully_connected(net, 1024, regularizer="L2", weight_decay=0.001, activation="leaky_relu")
# Dropout layer:
net = tflearn.layers.core.dropout(net, keep_prob=0.5)
# Output layer:
net = tflearn.layers.core.fully_connected(net, 2, activation="softmax")
net = tflearn.layers.estimator.regression(net, optimizer='adam',\
loss='categorical_crossentropy', learning_rate=0.0001)
model = tflearn.DNN(net, tensorboard_verbose=0)
dummyData = np.reshape(np.concatenate(kfoldData[:i] + kfoldData[i+1:], axis=0), [-1, 2000, 1])
print(dummyData.shape)
dummyLabels = np.reshape(np.concatenate(kfoldLabelsOH[:i] + \
kfoldLabelsOH[i+1:], axis=0), [-1, 2])
model.fit(dummyData, dummyLabels, n_epoch=25, show_metric=True)
illTest = []
healthTest = []
for index, item in enumerate(kfoldLabels[i]):
if item == 1:
illTest.append(kfoldData[i][index])
if item == 0:
healthTest.append(kfoldData[i][index])
healthLabel = np.tile([1,0], (len(healthTest), 1))
illLabel = np.tile([0,1], (len(illTest), 1))
sens.append(model.evaluate(np.array(healthTest), healthLabel))
spec.append(model.evaluate(np.array(illTest), illLabel))
predicted = np.array(model.predict(np.array(kfoldData[i])))
fpr, tpr, th = roc_curve(kfoldLabels[i], predicted[:,1])
roc.append((fpr,tpr))
auc.append(roc_auc_score(kfoldLabels[i], predicted[:,1]))
if k == 1:
sess = tf.InteractiveSession()
tf.reset_default_graph()
tflearn.initializations.normal()
# Input layer:
net0 = tflearn.layers.core.input_data(shape=[None, 2000, 1])
# First layer:
net1 = tflearn.layers.conv.conv_1d(net0, 32, 10, activation="leaky_relu")
net2 = tflearn.layers.conv.max_pool_1d(net1, 2)
# Second layer:
net3 = tflearn.layers.conv.conv_1d(net2, 64, 5, activation="leaky_relu")
net4 = tflearn.layers.conv.max_pool_1d(net3, 2)
# Fully connected layer 1:
net5 = tflearn.layers.core.fully_connected(net4, 1024, regularizer="L2", weight_decay=0.001, activation="leaky_relu")
# Dropout layer:
net6 = tflearn.layers.core.dropout(net5, keep_prob=0.5)
# Output layer:
net7 = tflearn.layers.core.fully_connected(net6, 2, activation="softmax")
net8 = tflearn.layers.estimator.regression(net7, optimizer='adam', loss='categorical_crossentropy',\
learning_rate=0.0001)
model = tflearn.DNN(net0, tensorboard_verbose=3)
model.fit(ecgData, classDataOH, n_epoch=5, validation_set=0.1, show_metric=True)
In [ ]:
if k != 1:
print("Specificity:", spec, "\nAvg:", np.mean(spec), np.std(spec)/np.sqrt(k), \
"\nSensitivity:", sens, "\nAvg:", np.mean(sens), np.std(sens)/np.sqrt(k), \
"\nAUC", auc, "\nAvg:", np.mean(auc), np.std(auc)/np.sqrt(k))
if k == 1:
print(model.evaluate(unseenData, unseenClassOH),"\n",\
model.evaluate(np.array(iUnseen), unseenIL),"\n",\
model.evaluate(np.array(hUnseen), unseenHL))
In [ ]:
np.save("./outData/finalData/1d_results_10k_25epoch", (spec, sens, auc))
In [ ]:
plt.figure(figsize=(5, 5))
if k != 1:
for i in np.arange(k):
plt.plot(roc[i][0], roc[i][1], "--")
#plt.plot(unseenroc[i][0], unseenroc[i][1])
plt.xlabel("False positive rate")
plt.ylabel("True positive rate")
plt.title("ROC curves for 1D MCG CNN after 25 epochs")
#plt.savefig("../thesis/images/roc_mcg1d.pdf")
np.save("./outData/finalData/1d25epoch10k.npy", roc)
print(auc)
if k == 1:
fpr, tpr, th = roc_curve(kfoldLabels[i], predicted[:,1])
plt.plot(fpr, tpr)
In [ ]:
netTst = tf.reshape(tf.tile(tf.reshape(net1, [-1]), [15]), [-1, 2000, 15, 30])
In [ ]:
w1 = net1.W
weights1 = model.get_weights(w1)
expandedWeights1 = np.reshape(np.tile(np.reshape(weights1, [10,32]), [15]), [10,15,1,32])
plt.imshow(np.transpose(expandedWeights1[:,:,0,2]), cmap='gray', interpolation='nearest')
In [ ]:
plt.imshow(weights[:,:,0,2].T, cmap='gray', interpolation='nearest')
print(data.shape)
In [ ]:
b1 = net1.b
biases1 = model.get_weights(var2)
b2 = net3
In [ ]:
var2 = net3.W
data1 = model.get_weights(var2)
print(data1[:,0,0,0])
plt.imshow(data1[:,0,0], cmap='gray', interpolation='nearest')
In [ ]: