In [1]:
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the linear classifier. These are the same steps as we used for the
SVM, but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape
In [8]:
def one_hot(x,n):
if type(x) == list: x = np.array(x)
x = x.flatten()
o_h = np.zeros((len(x),n))
o_h[np.arange(len(x)),x] = 1
return o_h
In [9]:
y_train_o_h = one_hot(y_train,10)
y_val_o_h = one_hot(y_val,10)
y_test_o_h = one_hot(y_test,10)
print 'Train labels shape: ', y_train_o_h.shape
print 'Train labels shape: ', y_val_o_h.shape
print 'Train labels shape: ', y_test_o_h.shape
X_train_t = np.transpose(X_train)
X_val_t = np.transpose(X_val)
X_test_t = np.transpose(X_test)
print 'Train data shape: ', X_train_t.shape
print 'Validation data shape: ', X_val_t.shape
print 'Test data shape: ', X_test_t.shape
In [3]:
import theano
from theano import tensor as T
In [4]:
def floatX(X):
return np.asarray(X, dtype=theano.config.floatX)
def init_weights(shape):
return theano.shared(floatX(np.random.randn(*shape) * 0.001))
def model(X, w):
return T.nnet.softmax(T.dot(X, w))
In [15]:
X = T.fmatrix()
Y = T.fmatrix()
w = init_weights((3073, 10))
py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)
cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) + 0.5 * 2.610000e+01 * (w * w).sum()
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 8.910000e-07]]
#update = [[w, w - gradient * 8.910000e-04]]
In [16]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
In [17]:
print "Initial Error:", np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))
for i in range(35):
for start, end in zip(range(0, len(X_train_t), 128), range(128, len(X_train_t), 128)):
cost = train(X_train_t[start:end], y_train_o_h[start:end])
print i, np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))
In [ ]: