In [1]:
import numpy as np
In [2]:
class NearestNeighbor:
def train(self, X, Y):
"""X is NxD array, N: training examples, D: flattened img"""
self.Ytrain = Y
self.Xtrain = X
def predict1(self, X, k=1):
"""X is 1xD array, D: flattened img"""
# L1 distance: d = sum |a - b|
distances = np.sum(np.abs(self.Xtrain - X), axis=1)
# L2 distance: d = sqrt(sum (a - b)^2)
#distances = np.sqrt(np.sum((self.Xtrain - X) ** 2))
min_indices = distances.argsort()
Ypred = self.Ytrain[min_indices[:k]]
_, counts = np.unique(Ypred, return_counts=True)
return Ypred[counts.argmax()]
In [3]:
from common import *
In [4]:
cifar = cifar10()
trainX = cifar[b'data'][:8000, :]
trainY = cifar[b'labels'][:8000]
testX = cifar[b'data'][8000:, :]
testY = cifar[b'labels'][8000:]
meta = cifar10('meta')
meta
Out[4]:
In [5]:
%matplotlib inline
In [6]:
i = 3
imshow(trainX[i, :].reshape(32, 32, 3))
meta[b'label_names'][trainY[i]]
Out[6]:
In [7]:
nn = NearestNeighbor()
nn.train(trainX, trainY)
In [8]:
i = 1
pred = nn.predict1(testX[i], 5)
imshow(testX[i, :].reshape(32, 32, 3))
"got %s, should be %s" % (meta[b'label_names'][pred], meta[b'label_names'][testY[i]])
Out[8]:
In [9]:
C = 50
testX_ = testX[:C, :]
testY_ = testY[:C]
for k in [1, 3, 9, 20, 50]:
pred = [nn.predict1(testX_[i], k) for i in range(C)]
wrong_cnt = np.count_nonzero(testY_ - pred)
print("Precision for k=%d: %.2f" % (k, 1 - (wrong_cnt / C)))