In [1]:
import random
import numpy as np
from cs231n.data_utils import load_CIFAR10
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading extenrnal modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000):
  """
  Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
  it for the linear classifier. These are the same steps as we used for the
  SVM, but condensed to a single function.  
  """
  # Load the raw CIFAR-10 data
  cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
  X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
  
  # subsample the data
  mask = range(num_training, num_training + num_validation)
  X_val = X_train[mask]
  y_val = y_train[mask]
  mask = range(num_training)
  X_train = X_train[mask]
  y_train = y_train[mask]
  mask = range(num_test)
  X_test = X_test[mask]
  y_test = y_test[mask]
  
  # Preprocessing: reshape the image data into rows
  X_train = np.reshape(X_train, (X_train.shape[0], -1))
  X_val = np.reshape(X_val, (X_val.shape[0], -1))
  X_test = np.reshape(X_test, (X_test.shape[0], -1))
  
  # Normalize the data: subtract the mean image
  mean_image = np.mean(X_train, axis = 0)
  X_train -= mean_image
  X_val -= mean_image
  X_test -= mean_image
  
  # add bias dimension and transform into columns
  X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
  X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
  X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T
  
  return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape


Train data shape:  (3073, 49000)
Train labels shape:  (49000,)
Validation data shape:  (3073, 1000)
Validation labels shape:  (1000,)
Test data shape:  (3073, 1000)
Test labels shape:  (1000,)

In [8]:
def one_hot(x,n):
    if type(x) == list: x = np.array(x)
    x = x.flatten()
    o_h = np.zeros((len(x),n))
    o_h[np.arange(len(x)),x] = 1
    return o_h




In [9]:
y_train_o_h = one_hot(y_train,10)
y_val_o_h = one_hot(y_val,10)
y_test_o_h = one_hot(y_test,10)
print 'Train labels shape: ', y_train_o_h.shape
print 'Train labels shape: ', y_val_o_h.shape
print 'Train labels shape: ', y_test_o_h.shape

X_train_t = np.transpose(X_train)
X_val_t = np.transpose(X_val)
X_test_t = np.transpose(X_test)
print 'Train data shape: ', X_train_t.shape
print 'Validation data shape: ', X_val_t.shape
print 'Test data shape: ', X_test_t.shape


Train labels shape:  (49000, 10)
Train labels shape:  (1000, 10)
Train labels shape:  (1000, 10)
Train data shape:  (49000, 3073)
Validation data shape:  (1000, 3073)
Test data shape:  (1000, 3073)

In [3]:
import theano
from theano import tensor as T


Using gpu device 0: GRID K520

In [4]:
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.001))

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

In [15]:
X = T.fmatrix()
Y = T.fmatrix()

w = init_weights((3073, 10))

py_x = model(X, w)
y_pred = T.argmax(py_x, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y)) + 0.5 * 2.610000e+01 * (w * w).sum()
gradient = T.grad(cost=cost, wrt=w)
update = [[w, w - gradient * 8.910000e-07]]
#update = [[w, w - gradient * 8.910000e-04]]

In [16]:
train = theano.function(inputs=[X, Y], outputs=cost, updates=update, allow_input_downcast=True)
predict = theano.function(inputs=[X], outputs=y_pred, allow_input_downcast=True)

In [17]:
print "Initial Error:", np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))
for i in range(35):
    for start, end in zip(range(0, len(X_train_t), 128), range(128, len(X_train_t), 128)):
        cost = train(X_train_t[start:end], y_train_o_h[start:end])
    print i, np.mean(np.argmax(y_test_o_h, axis=1) == predict(X_test_t))


Initial Error: 0.107
0 0.26
1 0.294
2 0.323
3 0.328
4 0.334
5 0.336
6 0.342
7 0.35
8 0.356
9 0.36
10 0.366
11 0.366
12 0.369
13 0.369
14 0.373
15 0.379
16 0.38
17 0.38
18 0.377
19 0.375
20 0.377
21 0.379
22 0.378
23 0.382
24 0.384
25 0.386
26 0.388
27 0.39
28 0.392
29 0.393
30 0.391
31 0.39
32 0.39
33 0.387
34 0.387

In [ ]: