A support vector classification machine with the RBF Kernel (C=1 and gamma=0.0001) was built here. And two sets of image data were tested with the model.
In [1]:
import datetime
import gc
import numpy as np
import os
import random
from scipy import misc
import string
import time
import sys
import sklearn.metrics as skm
import collections
from sklearn.svm import SVC
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
from sklearn import metrics
import dwdii_bc_model_helper as bc
random.seed(20275)
np.set_printoptions(precision=2)
In [2]:
imagePath = "png"
trainImagePath = imagePath
trainDataPath = "data/ddsm_train.csv"
testDataPath = "data/ddsm_test.csv"
categories = bc.bcNormVsAbnormNumerics()
imgResize = (150, 150)
normalVsAbnormal=True
In [3]:
os.listdir('data')
Out[3]:
In [4]:
metaData, meta2, mCounts = bc.load_training_metadata(trainDataPath, balanceViaRemoval=True, verbose=True,
normalVsAbnormal=True)
In [5]:
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_data, Y_data = bc.load_data(trainDataPath, trainImagePath,
categories=categories,
maxData = maxData,
verboseFreq = 50,
imgResize=imgResize,
normalVsAbnormal=True)
print X_data.shape
print Y_data.shape
In [6]:
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_test, Y_test = bc.load_data(testDataPath, imagePath,
categories=categories,
maxData = maxData,
verboseFreq = 50,
imgResize=imgResize,
normalVsAbnormal=True)
print X_test.shape
print Y_test.shape
In [7]:
X_train = X_data
Y_train = Y_data
In [8]:
print X_train.shape
print X_test.shape
print Y_train.shape
print Y_test.shape
In [9]:
def yDist(y):
bcCounts = collections.defaultdict(int)
for a in range(0, y.shape[0]):
bcCounts[y[a][0]] += 1
return bcCounts
print "Y_train Dist: " + str(yDist(Y_train))
print "Y_test Dist: " + str(yDist(Y_test))
In [10]:
X_train_s = X_train.reshape((2893,-1))
In [11]:
X_test_s = X_test.reshape((726,-1))
In [12]:
Y_train_s = Y_train.ravel()
In [13]:
model = SVC(C=1.0, gamma=0.0001, kernel='rbf')
In [14]:
model.fit(X_train_s,Y_train_s)
Out[14]:
In [15]:
predicted = model.predict(X_test_s)
expected = Y_test
In [16]:
svm_matrix = skm.confusion_matrix(Y_test, predicted)
svm_matrix
Out[16]:
In [17]:
print metrics.accuracy_score(expected,predicted)
In [18]:
numBC = bc.reverseDict(categories)
In [19]:
class_names = numBC.values()
np.set_printoptions(precision=2)
In [20]:
# Plot non-normalized confusion matrix
plt.figure()
bc.plot_confusion_matrix(svm_matrix, classes=class_names,
title='Confusion Matrix without normalization')
plt.savefig('raw_class2_o_norm.png')
In [21]:
from IPython.display import Image
Image(filename='raw_class2_o_norm.png')
Out[21]:
In [22]:
plt.figure()
bc.plot_confusion_matrix(svm_matrix, classes=class_names, normalize=True,
title='Confusion Matrix with normalization')
plt.savefig('raw_class2_norm.png')
In [23]:
# Load the image we just saved
from IPython.display import Image
Image(filename='raw_class2_norm.png')
Out[23]:
In [24]:
imagePath = "DDSM_threshold"
trainImagePath = imagePath
trainDataPath = "data/ddsm_train.csv"
testDataPath = "data/ddsm_test.csv"
categories = bc.bcNormVsAbnormNumerics()
imgResize = (150, 150)
normalVsAbnormal=True
In [25]:
os.listdir('data')
Out[25]:
In [26]:
metaData, meta2, mCounts = bc.load_training_metadata(trainDataPath, balanceViaRemoval=True, verbose=True,
normalVsAbnormal=True)
In [27]:
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_data, Y_data = bc.load_data(trainDataPath, trainImagePath,
categories=categories,
maxData = maxData,
verboseFreq = 50,
imgResize=imgResize,
normalVsAbnormal=True)
print X_data.shape
print Y_data.shape
In [28]:
# Actually load some representative data for model experimentation
maxData = len(metaData)
X_test, Y_test = bc.load_data(testDataPath, imagePath,
categories=categories,
maxData = maxData,
verboseFreq = 50,
imgResize=imgResize,
normalVsAbnormal=True)
print X_test.shape
print Y_test.shape
In [29]:
X_train = X_data
Y_train = Y_data
In [30]:
print X_train.shape
print X_test.shape
print Y_train.shape
print Y_test.shape
In [31]:
def yDist(y):
bcCounts = collections.defaultdict(int)
for a in range(0, y.shape[0]):
bcCounts[y[a][0]] += 1
return bcCounts
print "Y_train Dist: " + str(yDist(Y_train))
print "Y_test Dist: " + str(yDist(Y_test))
In [32]:
X_train_s = X_train.reshape((2742,-1))
X_test_s = X_test.reshape((691,-1))
Y_train_s = Y_train.ravel()
In [33]:
model = SVC(C=1.0, gamma=0.0001, kernel='rbf')
model.fit(X_train_s,Y_train_s)
Out[33]:
In [34]:
predicted = model.predict(X_test_s)
expected = Y_test
In [35]:
svm_matrix = skm.confusion_matrix(Y_test, predicted)
svm_matrix
Out[35]:
In [36]:
print metrics.accuracy_score(expected,predicted)
In [41]:
numBC = bc.reverseDict(categories)
class_names = numBC.values()
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
bc.plot_confusion_matrix(svm_matrix, classes=class_names,
title='Confusion Matrix without normalization')
plt.savefig('threshold_class2_o_norm.png')
In [42]:
from IPython.display import Image
Image(filename='threshold_class2_o_norm.png')
Out[42]:
In [43]:
plt.figure()
bc.plot_confusion_matrix(svm_matrix, classes=class_names, normalize=True,
title='Confusion Matrix with normalization')
plt.savefig('threshold_class2_norm.png')
In [44]:
# Load the image we just saved
from IPython.display import Image
Image(filename='threshold_class2_norm.png')
Out[44]:
In [ ]: