In [3]:
import mnist_util as mu
import numpy as np


# importing training images and test images
X_train, Y_train, X_test, Y_test = mu.get_np_array()

In [4]:
print('Training data shape: ', X_train.shape)
print('Training labels shape: ', Y_train.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', Y_test.shape)


Training data shape:  (60000, 784)
Training labels shape:  (60000,)
Test data shape:  (10000, 784)
Test labels shape:  (10000,)

In [5]:
#use less data = less waiting for testing
num_training = 5000
mask = list(range(num_training))
X_train = X_train[mask]
Y_train = Y_train[mask]

num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
Y_test = Y_test[mask]

In [6]:
# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)


(5000, 784) (500, 784)

In [7]:
from classifier.k_nearest_neighbor import KNearestNeighbor
classifier = KNearestNeighbor()
classifier.train(X_train, Y_train)

In [8]:
dists = classifier.compute_distances_no_loops(X_test)
print(dists.shape)


(500, 5000)

In [9]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

plt.imshow(dists, interpolation='none')
plt.show()



In [10]:
Y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(Y_test_pred == Y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy))


Got 453 / 500 correct => accuracy: 0.906000

In [66]:
#print images predicted wrong
nums=[]
test=[]
for i in range(Y_test.size):
        if(Y_test[i]!=Y_test_pred[i]):
            nums.append(X_train[i])
print(np.array(nums).shape)
mu.show_nums(np.array(nums))


(233, 784)
Out[66]:
<matplotlib.image.AxesImage at 0x7fec8e0b2b38>

In [49]:
k_choices = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 20, 50, 100,200,500,1000]
from sklearn.metrics import confusion_matrix
counter=0
cm=np.empty([18,10,10],dtype=np.int32)
for k in k_choices:
    Y_test_pred = classifier.predict_labels(dists, k=k)
    num_correct = np.sum(Y_test_pred == Y_test)
    accuracy = float(num_correct) / num_test
    print('for %d Got %d / %d correct => accuracy: %f' % (k,num_correct, num_test, accuracy))
    cm[counter] = confusion_matrix(Y_test, Y_test_pred)
    counter=counter+1


for 1 Got 453 / 500 correct => accuracy: 0.906000
for 2 Got 447 / 500 correct => accuracy: 0.894000
for 3 Got 459 / 500 correct => accuracy: 0.918000
for 4 Got 456 / 500 correct => accuracy: 0.912000
for 5 Got 455 / 500 correct => accuracy: 0.910000
for 6 Got 452 / 500 correct => accuracy: 0.904000
for 7 Got 452 / 500 correct => accuracy: 0.904000
for 8 Got 453 / 500 correct => accuracy: 0.906000
for 9 Got 449 / 500 correct => accuracy: 0.898000
for 10 Got 445 / 500 correct => accuracy: 0.890000
for 12 Got 442 / 500 correct => accuracy: 0.884000
for 15 Got 440 / 500 correct => accuracy: 0.880000
for 20 Got 438 / 500 correct => accuracy: 0.876000
for 50 Got 425 / 500 correct => accuracy: 0.850000
for 100 Got 406 / 500 correct => accuracy: 0.812000
for 200 Got 383 / 500 correct => accuracy: 0.766000
for 500 Got 330 / 500 correct => accuracy: 0.660000
for 1000 Got 267 / 500 correct => accuracy: 0.534000

In [57]:
mu.plot_confusion_matrix(cm[0], title='Confusion Matrix',classes=np.arange(10))



In [55]:
mu.plot_confusion_matrix(cm[2], title='Confusion Matrix',classes=np.arange(10))



In [68]:
mu.plot_confusion_matrix(cm[15], title='Confusion Matrix',classes=np.arange(10))



In [ ]: