In [1]:
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
In [2]:
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
In [3]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for the linear classifier. These are the same steps as we used for the
SVM, but condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# subsample the data
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
# Preprocessing: reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_val = np.reshape(X_val, (X_val.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
# Normalize the data: subtract the mean image
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# add bias dimension and transform into columns
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T
return X_train, y_train, X_val, y_val, X_test, y_test
# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape
In [4]:
def one_hot(x,n):
if type(x) == list: x = np.array(x)
x = x.flatten()
o_h = np.zeros((len(x),n))
o_h[np.arange(len(x)),x] = 1
return o_h
In [5]:
y_train_o_h = one_hot(y_train,10)
y_val_o_h = one_hot(y_val,10)
y_test_o_h = one_hot(y_test,10)
print 'Train labels shape: ', y_train_o_h.shape
print 'Train labels shape: ', y_val_o_h.shape
print 'Train labels shape: ', y_test_o_h.shape
X_train_t = np.transpose(X_train)
X_val_t = np.transpose(X_val)
X_test_t = np.transpose(X_test)
print 'Train data shape: ', X_train_t.shape
print 'Validation data shape: ', X_val_t.shape
print 'Test data shape: ', X_test_t.shape
In [6]:
srng = RandomStreams()
In [85]:
def floatX(X):
return np.asarray(X, dtype=theano.config.floatX)
def init_weights(shape, factor=0.00005):
return theano.shared(floatX(np.random.randn(*shape) * factor))
def rectify(X):
return T.maximum(X, 0.0)
def softmax(X):
e_x = T.exp(X - X.max(axis=1).dimshuffle(0, 'x'))
return e_x / e_x.sum(axis=1).dimshuffle(0, 'x')
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
grads = T.grad(cost=cost, wrt=params)
updates = []
for p, g in zip(params, grads):
acc = theano.shared(p.get_value() * 0.0)
acc_new = rho * acc + (1 - rho) * g ** 2
gradient_scaling = T.sqrt(acc_new + epsilon)
g = g / gradient_scaling
updates.append((acc, acc_new))
updates.append((p, p - lr * g))
return updates
def dropout(X, p=0.0):
if p > 0:
retain_prob = 1 - p
X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
X /= retain_prob
return X
def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
X = dropout(X, p_drop_input)
h = rectify(T.dot(X, w_h))
h = dropout(h, p_drop_hidden)
h2 = rectify(T.dot(h, w_h2))
h2 = dropout(h2, p_drop_hidden)
py_x = softmax(T.dot(h2, w_o))
return h, h2, py_x
In [86]:
X = T.fmatrix()
Y = T.fmatrix()
w_h = init_weights((3073, 500))
w_h2 = init_weights((500, 500))
w_o = init_weights((500, 10))
noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
h, h2, py_x = model(X, w_h, w_h2, w_o, 0.0, 0.0)
y_pred = T.argmax(py_x, axis=1)
In [87]:
cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y))
params = [w_h, w_h2, w_o]
updates = RMSprop(cost, params, lr=0.00001)
In [88]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)
# This may take a minute or two.
In [95]:
print "Initial Error:", np.mean(np.argmax(y_val_o_h, axis=1) == predict(X_val_t))
for i in range(100):
for start, end in zip(range(0, len(X_train_t), 128), range(128, len(X_train_t), 128)):
cost = train(X_train_t[start:end], y_train_o_h[start:end])
print i, np.mean(np.argmax(y_val_o_h, axis=1) == predict(X_val_t))
In [96]:
np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))
Out[96]:
In [ ]: