Convolutional Networks

So far we have worked with deep fully-connected networks, using them to explore different optimization strategies and network architectures. Fully-connected networks are a good testbed for experimentation because they are very computationally efficient, but in practice all state-of-the-art results use convolutional networks instead.

First you will implement several layer types that are used in convolutional networks. You will then use these layers to train a convolutional network on the CIFAR-10 dataset.


In [1]:
# As usual, a bit of setup

import numpy as np
import matplotlib.pyplot as plt
from cs231n.classifiers.cnn import *
from cs231n.data_utils import get_CIFAR10_data
from cs231n.gradient_check import eval_numerical_gradient_array, eval_numerical_gradient
from cs231n.layers import *
from cs231n.fast_layers import *
from cs231n.solver import Solver

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

def rel_error(x, y):
  """ returns relative error """
  return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

In [2]:
import csv
import numpy as np

csv_file = open('cs231n/datasets/fer2013/fer2013.csv')

reader_file = csv.reader(csv_file)

def read_faces_csv(reader_file, center=True):
	"""
	Function that takes as input file a csv.reader() instance and assumes the following formatting:
	emotion, pixels (2034 of them), usage (train, test, val)
	Returns the following numpy arrays:
	- X_train, y_train (respectively (N, 48, 48), (N,) representing raw grayscale pixels and emotion labels)
	- X_test, y_test
	- X_val, y_val
	"""

	# Discard header
	row = next(reader_file)

	X_train_list, y_train_list = [], []
	X_test_list, y_test_list = [], []
	X_val_list, y_val_list = [], []

	N_train, N_test, N_val = 0, 0, 0

	for row in reader_file:
		y_str, X_row_str, data_type = row
		y = int(y_str)

		X_row_strs = X_row_str.split(' ')
		X_row = [float(x) for x in X_row_strs]
		
		if data_type == 'PublicTest':
			y_test_list.append(y)
			X_test_list.append(X_row)
			N_test += 1
		elif data_type == 'PrivateTest':
			y_val_list.append(y)
			X_val_list.append(X_row)
			N_val += 1
		else:
			y_train_list.append(y)
			X_train_list.append(X_row)
			N_train += 1

	X_train = np.asarray(X_train_list).astype('float64').reshape((N_train, 48, 48))
	y_train = np.asarray(y_train_list)

	X_test = np.asarray(X_test_list).astype('float64').reshape((N_test, 48, 48))
	y_test = np.asarray(y_test_list)

	X_val = np.asarray(X_val_list).astype('float64').reshape((N_val, 48, 48))
	y_val = np.asarray(y_val_list)

	# decide to mean-center or not
	if center:
		train_mean = X_train.mean(axis = 0)
		X_train -= train_mean
		X_test -= train_mean
		X_val -= train_mean

	#########
	return X_train, y_train, X_test, y_test, X_val, y_val

X_train, y_train, X_test, y_test, X_val, y_val = read_faces_csv(reader_file)

#(28709, 48, 48) (28709,) (3589, 48, 48) (3589,)

In [3]:
N, D, D = X_train.shape
N_val = X_val.shape[0]
N_test = X_test.shape[0]

X_train2 = np.zeros((N,3,D,D))
X_val2 = np.zeros((N_val,3,D,D))
X_test2 = np.zeros((N_test,3,D,D))

#convert graysclae to rgb
for i in xrange(N):
    for j in xrange(3):
        X_train2[i,j,:,:] = X_train[i,:,:]
for i in xrange(N_val):
    for j in xrange(3):
        X_val2[i,j,:,:] = X_val[i,:,:]
for i in xrange(N_test):
    for j in xrange(3):
        X_test2[i,j,:,:] = X_test[i,:,:]
        
num_train = 100
small_data = {
  'X_train': X_train2[:num_train],
  'y_train': y_train[:num_train],
  'X_val': X_val2,
  'y_val': y_val,
}

print X_train.shape, y_train.shape, X_val.shape, y_test.shape
model = ThreeLayerConvNet(weight_scale=5e-3)

solver = Solver(model, small_data,
                num_epochs=15, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 5e-4,
                },
                verbose=True, print_every=1)
solver.train()


(28709, 48, 48) (28709,) (3589, 48, 48) (3589,)
(Iteration 1 / 30) loss: 2.294224
(Epoch 0 / 15) train acc: 0.160000; val_acc: 0.165506
(Iteration 2 / 30) loss: 2.409355
(Epoch 1 / 15) train acc: 0.170000; val_acc: 0.147116
(Iteration 3 / 30) loss: 1.947595
(Iteration 4 / 30) loss: 1.726817
(Epoch 2 / 15) train acc: 0.320000; val_acc: 0.168013
(Iteration 5 / 30) loss: 1.753057
(Iteration 6 / 30) loss: 1.849576
(Epoch 3 / 15) train acc: 0.390000; val_acc: 0.193926
(Iteration 7 / 30) loss: 1.514765
(Iteration 8 / 30) loss: 1.539288
(Epoch 4 / 15) train acc: 0.460000; val_acc: 0.179159
(Iteration 9 / 30) loss: 1.428239
(Iteration 10 / 30) loss: 1.299156
(Epoch 5 / 15) train acc: 0.470000; val_acc: 0.218445
(Iteration 11 / 30) loss: 1.526907
(Iteration 12 / 30) loss: 1.278815
(Epoch 6 / 15) train acc: 0.650000; val_acc: 0.209808
(Iteration 13 / 30) loss: 1.157935
(Iteration 14 / 30) loss: 1.128510
(Epoch 7 / 15) train acc: 0.710000; val_acc: 0.236835
(Iteration 15 / 30) loss: 0.886735
(Iteration 16 / 30) loss: 0.943601
(Epoch 8 / 15) train acc: 0.750000; val_acc: 0.209529
(Iteration 17 / 30) loss: 0.820637
(Iteration 18 / 30) loss: 0.822632
(Epoch 9 / 15) train acc: 0.780000; val_acc: 0.194205
(Iteration 19 / 30) loss: 0.701598
(Iteration 20 / 30) loss: 0.604578
(Epoch 10 / 15) train acc: 0.860000; val_acc: 0.215938
(Iteration 21 / 30) loss: 0.295466
(Iteration 22 / 30) loss: 0.419594
(Epoch 11 / 15) train acc: 0.920000; val_acc: 0.207021
(Iteration 23 / 30) loss: 0.233339
(Iteration 24 / 30) loss: 0.299576
(Epoch 12 / 15) train acc: 0.950000; val_acc: 0.222903
(Iteration 25 / 30) loss: 0.187016
(Iteration 26 / 30) loss: 0.177279
(Epoch 13 / 15) train acc: 0.980000; val_acc: 0.207857
(Iteration 27 / 30) loss: 0.253298
(Iteration 28 / 30) loss: 0.080520
(Epoch 14 / 15) train acc: 0.970000; val_acc: 0.197548
(Iteration 29 / 30) loss: 0.125507
(Iteration 30 / 30) loss: 0.153396
(Epoch 15 / 15) train acc: 0.990000; val_acc: 0.226804

In [ ]:
data = {
  'X_train': X_train2,
  'y_train': y_train,
  'X_val': X_val2,
  'y_val': y_val,
  'X_test': X_test2,
  'y_test': y_test,
}

model = ThreeLayerConvNet(weight_scale=5e-3)

solver = Solver(model, small_data,
                num_epochs=10, batch_size=50,
                update_rule='adam',
                optim_config={
                  'learning_rate': 5e-4,
                },
                verbose=True, print_every=1)
solver.train()


(Iteration 1 / 20) loss: 2.294817
(Epoch 0 / 10) train acc: 0.170000; val_acc: 0.147116
(Iteration 2 / 20) loss: 2.236982
(Epoch 1 / 10) train acc: 0.270000; val_acc: 0.149067
(Iteration 3 / 20) loss: 1.855226
(Iteration 4 / 20) loss: 1.931032
(Epoch 2 / 10) train acc: 0.310000; val_acc: 0.225968
(Iteration 5 / 20) loss: 1.770074
(Iteration 6 / 20) loss: 1.674149
(Epoch 3 / 10) train acc: 0.420000; val_acc: 0.214544
(Iteration 7 / 20) loss: 1.692839
(Iteration 8 / 20) loss: 1.502859
(Epoch 4 / 10) train acc: 0.510000; val_acc: 0.197827
(Iteration 9 / 20) loss: 1.522390
(Iteration 10 / 20) loss: 1.405718
(Epoch 5 / 10) train acc: 0.430000; val_acc: 0.181109
(Iteration 11 / 20) loss: 1.192431
(Iteration 12 / 20) loss: 1.401796
(Epoch 6 / 10) train acc: 0.530000; val_acc: 0.186403
(Iteration 13 / 20) loss: 1.157968
(Iteration 14 / 20) loss: 0.933619
(Epoch 7 / 10) train acc: 0.640000; val_acc: 0.209250
(Iteration 15 / 20) loss: 1.039076
(Iteration 16 / 20) loss: 0.871343
(Epoch 8 / 10) train acc: 0.660000; val_acc: 0.207579
(Iteration 17 / 20) loss: 0.766481
(Iteration 18 / 20) loss: 1.016072
(Epoch 9 / 10) train acc: 0.750000; val_acc: 0.247144
(Iteration 19 / 20) loss: 0.618025
(Iteration 20 / 20) loss: 0.769494
(Epoch 10 / 10) train acc: 0.780000; val_acc: 0.207857

In [ ]:
y_test_pred = np.argmax(model.loss(data['X_test']), axis=1)
y_val_pred = np.argmax(model.loss(data['X_val']), axis=1)
print 'Validation set accuracy: ', (y_val_pred == data['y_val']).mean()
print 'Test set accuracy: ', (y_test_pred == data['y_test']).mean()