notebook.community

Edit and run



In [1]:

    
import numpy as np
import pandas as pd



In [2]:

    
# from http://www.cs.toronto.edu/~kriz/cifar.html
def unpickle(filename):
    import cPickle
    fo = open(filename, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict



In [3]:

    
X_train = []
y_train = []
for i in xrange(1,6):
    data_batch = unpickle('/Users/excalibur/Dropbox/1_Studies/1_STEM/ArtificialIntelligence/datasets/cifar-10-batches-py/data_batch_{0}'.format(i))
    if i == 1:
        X_train = data_batch['data']
        y_train = np.array(data_batch['labels'])
    else:
        X_train = np.vstack((X_train,data_batch['data']))
        y_train = np.hstack((y_train,np.array(data_batch['labels'])))

print X_train.shape
print y_train.shape









    



(50000, 3072)
(50000,)



In [4]:

    
test_batch = unpickle('/Users/excalibur/Dropbox/1_Studies/1_STEM/ArtificialIntelligence/datasets/cifar-10-batches-py/test_batch')
X_test = test_batch['data']
y_test = np.array(test_batch['labels'])
print X_test.shape
print y_test.shape









    



(10000, 3072)
(10000,)

$d_{1}(I_{1},I_{2}) = \sum\limits_{p} \lvert I_{1}^{p}-I_{2}^{p}\rvert$



In [5]:

    
class NearestNeighbor:
    def __init__(self):
        self.X_train = []
        self.y_train = []
        self.y_hat = []
    
    def train(self,X,y):
        self.X_train = X
        self.y_train = y
     
    ### CRAZY SLOW
    def predict_SLOW(self,X):
        self.y_hat = np.zeros(X.shape[0], dtype=self.y_train.dtype)
        for i in xrange(X.shape[0]):
            L1_row_distances = np.sum(np.abs(self.X_train - X[i,:]), axis=1)
            argmin_index = np.argmin(L1_row_distances)
            self.y_hat[i] = self.y_train[argmin_index]
            if i % 100 == 0:
                print i
        return self.y_hat
    
    def score(self,y):
        return np.mean(self.y_hat == y)



In [6]:

    
#nn = NearestNeighbor()
#nn.train(X_train, y_train)
#y_hat = nn.predict(X_test)
#nn.score(y_test)



In [7]:

    
from sklearn import neighbors, datasets



In [14]:

    
clf = neighbors.KNeighborsClassifier(10)



In [ ]:

    
clf.fit(X_train, y_train)
clf.predict(X_test)



In [13]:

    
clf.score(X_test,y_test)









    Out[13]:





0.16320000000000001