In [1]:
import numpy as np
import pandas as pd
In [2]:
# from http://www.cs.toronto.edu/~kriz/cifar.html
def unpickle(filename):
import cPickle
fo = open(filename, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
In [3]:
X_train = []
y_train = []
for i in xrange(1,6):
data_batch = unpickle('/Users/excalibur/Dropbox/1_Studies/1_STEM/ArtificialIntelligence/datasets/cifar-10-batches-py/data_batch_{0}'.format(i))
if i == 1:
X_train = data_batch['data']
y_train = np.array(data_batch['labels'])
else:
X_train = np.vstack((X_train,data_batch['data']))
y_train = np.hstack((y_train,np.array(data_batch['labels'])))
print X_train.shape
print y_train.shape
In [4]:
test_batch = unpickle('/Users/excalibur/Dropbox/1_Studies/1_STEM/ArtificialIntelligence/datasets/cifar-10-batches-py/test_batch')
X_test = test_batch['data']
y_test = np.array(test_batch['labels'])
print X_test.shape
print y_test.shape
$d_{1}(I_{1},I_{2}) = \sum\limits_{p} \lvert I_{1}^{p}-I_{2}^{p}\rvert$
In [5]:
class NearestNeighbor:
def __init__(self):
self.X_train = []
self.y_train = []
self.y_hat = []
def train(self,X,y):
self.X_train = X
self.y_train = y
### CRAZY SLOW
def predict_SLOW(self,X):
self.y_hat = np.zeros(X.shape[0], dtype=self.y_train.dtype)
for i in xrange(X.shape[0]):
L1_row_distances = np.sum(np.abs(self.X_train - X[i,:]), axis=1)
argmin_index = np.argmin(L1_row_distances)
self.y_hat[i] = self.y_train[argmin_index]
if i % 100 == 0:
print i
return self.y_hat
def score(self,y):
return np.mean(self.y_hat == y)
In [6]:
#nn = NearestNeighbor()
#nn.train(X_train, y_train)
#y_hat = nn.predict(X_test)
#nn.score(y_test)
In [7]:
from sklearn import neighbors, datasets
In [14]:
clf = neighbors.KNeighborsClassifier(10)
In [ ]:
clf.fit(X_train, y_train)
clf.predict(X_test)
In [13]:
clf.score(X_test,y_test)
Out[13]: