Most of the codes are copied from binary logistic implementation to make this notebook self-contained.
In [1]:
# Setup code for this notebook
import random
import numpy as np
import matplotlib.pyplot as plt
# This is a bit of magic gto make matplotlib figures appear inline
# in the notebook rather than in a new window
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
I use the loading function from course code from Stanford University
Run get_datasets.sh in terminal to download the datasets, or download from Alex Krizhevsky.
get_datasets.sh
# Get CIFAR10 wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz tar -xzvf cifar-10-python.tar.gz rm cifar-10-python.tar.gz
The results of the downloading is showed in following figure.
In [2]:
# Write function to load the cifar-10 data
# The original code is from http://cs231n.github.io/assignment1/
# The function is in data_utils.py file for reusing.
import cPickle as pickle
import numpy as np
import os
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'r') as f:
datadict = pickle.load(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
In [3]:
from algorithms.data_utils import load_CIFAR10
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
def get_CIFAR10_data(num_training=49000, num_val=1000, num_test=10000, show_sample=True):
"""
Load the CIFAR-10 dataset, and divide the sample into training set, validation set and test set
"""
cifar10_dir = 'datasets/datasets-cifar-10/cifar-10-batches-py/'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# subsample the data for validation set
mask = xrange(num_training, num_training + num_val)
X_val = X_train[mask]
y_val = y_train[mask]
mask = xrange(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = xrange(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
return X_train, y_train, X_val, y_val, X_test, y_test
def visualize_sample(X_train, y_train, classes, samples_per_class=7):
"""visualize some samples in the training datasets """
num_classes = len(classes)
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y) # get all the indexes of cls
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs): # plot the image one by one
plt_idx = i * num_classes + y + 1 # i*num_classes and y+1 determine the row and column respectively
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show()
def preprocessing_CIFAR10_data(X_train, y_train, X_val, y_val, X_test, y_test):
# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1)) # [49000, 3072]
X_val = np.reshape(X_val, (X_val.shape[0], -1)) # [1000, 3072]
X_test = np.reshape(X_test, (X_test.shape[0], -1)) # [10000, 3072]
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above functions to get our data
X_train_raw, y_train_raw, X_val_raw, y_val_raw, X_test_raw, y_test_raw = get_CIFAR10_data()
visualize_sample(X_train_raw, y_train_raw, classes)
X_train, y_train, X_val, y_val, X_test, y_test = preprocessing_CIFAR10_data(X_train_raw, y_train_raw, X_val_raw, y_val_raw, X_test_raw, y_test_raw)
# As a sanity check, we print out th size of the training and test data dimenstion
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape
The code is running in the backend, you can find it here, or github
The basic idea is to change multiple classes into two classes, and construct one logistic classifier for each class. We set the value of y (label) of one class to 1, and 0 for other classes.
In [4]:
# Set the label of the first class to be one, and 0 for others
from copy import deepcopy
y_train_test_loss = deepcopy(y_train)
idxs_zero = y_train_test_loss == 0
y_train_test_loss[idxs_zero] = 1
y_train_test_loss[-idxs_zero] = 0
# Test the loss and gradient and compare between two implementations
from algorithms.classifiers import loss_grad_logistic_naive, loss_grad_logistic_vectorized
import time
# generate a rand weights W
W = np.random.randn(1, X_train.shape[0]) * 0.001
tic = time.time()
loss_naive, grad_naive = loss_grad_logistic_naive(W, X_train, y_train_test_loss, 0)
toc = time.time()
print 'Naive loss: %f, and gradient: computed in %fs' % (loss_naive, toc - tic)
tic = time.time()
loss_vec, grad_vect = loss_grad_logistic_vectorized(W, X_train, y_train_test_loss, 0)
toc = time.time()
print 'Vectorized loss: %f, and gradient: computed in %fs' % (loss_vec, toc - tic)
# Compare the gradient, because the gradient is a vector, we canuse the Frobenius norm to compare them
# the Frobenius norm of two matrices is the square root of the squared sum of differences of all elements
diff = np.linalg.norm(grad_naive - grad_vect, ord='fro')
# Randomly choose some gradient to check
idxs = np.random.choice(X_train.shape[0], 10, replace=False)
print idxs
print grad_naive[0, idxs]
print grad_vect[0, idxs]
print 'Gradient difference between naive and vectorized version is: %f' % diff
del loss_naive, loss_vec, grad_naive, y_train_test_loss
In [5]:
# file: algorithms/gradient_check.py
def grad_check_sparse(f, x, analytic_grad, num_checks):
"""
sample a few random elements and only return numerical
in this dimensions.
"""
h = 1e-5
print x.shape
for i in xrange(num_checks):
ix = tuple([randrange(m) for m in x.shape])
print ix
x[ix] += h # increment by h
fxph = f(x) # evaluate f(x + h)
x[ix] -= 2 * h # increment by h
fxmh = f(x) # evaluate f(x - h)
x[ix] += h # reset
grad_numerical = (fxph - fxmh) / (2 * h)
grad_analytic = analytic_grad[ix]
rel_error = abs(grad_numerical - grad_analytic) / (abs(grad_numerical) + abs(grad_analytic))
print 'numerical: %f analytic: %f, relative error: %e' % (grad_numerical, grad_analytic, rel_error)
# Check gradient using numerical gradient along several randomly chosen dimenstion
from algorithms.gradient_check import grad_check_sparse
f = lambda w: loss_grad_logistic_vectorized(w, X_train, y_train, 0)[0]
grad_numerical = grad_check_sparse(f, W, grad_vect, 10)
In [6]:
# train 10 logistic classifier
from algorithms.classifiers import Logistic
logistic_classifiers = []
num_classes = np.max(y_train) + 1
losses = []
for i in xrange(num_classes):
print '\nThe %d/%dth logistic classifier training...' % (i+1, num_classes)
y_train_logistic = deepcopy(y_train)
idxs_i = y_train_logistic == i
y_train_logistic[idxs_i] = 1
y_train_logistic[-idxs_i] = 0
logistic = Logistic()
loss = logistic.train(X_train, y_train_logistic, method='sgd', batch_size=200, learning_rate=1e-6,
reg = 1e3, num_iters=1000, verbose=True, vectorized=True)
losses.append(loss)
logistic_classifiers.append(logistic)
In [7]:
from ggplot import *
# plot the loss of the last trained logistic classifier
qplot(xrange(len(losses[9])), losses[9]) + labs(x='Iteration number', y='SGD Loss for last trained classifier')
Out[7]:
In [8]:
# Compute the accuracy of training data and validation data
def predict_one_vs_all(logistic_classifiers, X, num_classes):
scores = np.zeros((num_classes, X.shape[1]))
for i in xrange(num_classes):
logistic = logistic_classifiers[i]
scores[i, :] = logistic.predict(X)[1]
pred_X = np.argmax(scores, axis=0)
return pred_X
pred_train_one_vs_all = predict_one_vs_all(logistic_classifiers, X_train, num_classes)
pred_val_one_vs_all = predict_one_vs_all(logistic_classifiers, X_val, num_classes)
pred_test_one_vs_all = predict_one_vs_all(logistic_classifiers, X_test, num_classes)
print 'Training dataset accuracy: %f' % (np.mean(y_train == pred_train_one_vs_all))
print 'Validation dataset accuracy: %f' % (np.mean(y_val == pred_val_one_vs_all))
print 'Test datast accuracy: %f' % (np.mean(y_test == pred_test_one_vs_all))
In [8]: