Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the assignments page on the course website.
This exercise is analogous to the SVM exercise. You will:
In [1]:
# Run some setup code
import numpy as np
import matplotlib.pyplot as plt
# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
# bool var. to let program show debug info.
debug = True
show_img = True
In [2]:
import cifar10
# Load the raw CIFAR-10 data
X, y, X_test, y_test = cifar10.load('../cifar-10-batches-py', debug = debug)
m = 49000
m_val = 1000
m_test = 1000
m_dev = 500
X, y, X_test, y_test, X_dev, y_dev, X_val, y_val = cifar10.split_vec(X, y, X_test, y_test, m, m_test, m_val, m_dev, debug = debug, show_img = show_img)
In [33]:
n = X_dev.shape[1]
K = 10
from softmax import Softmax
model = Softmax(n, K)
lamda = 0.0
model.train_check(X, y, lamda)
lamda = 3.3
model.train_check(X, y, lamda)
Inline Question 1: Why do we expect our loss to be close to -log(0.1)? Explain briefly.** Your answer: log(K)
In [40]:
# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
results = {}
best_val = -1
best_model = None
best_hpara = None
T = 1500
B = 256
alpha = 1e-6
# alphalearning_rates = [1e-7, 5e-7]
# regularization_strengths = [5e4, 1e8]
for lamda in [1e0, 1e3, 1e1]:
model = Softmax(n, K)
hpara = (alpha, lamda, T, B)
model.train(X, y, hpara, show_img = False, debug = False)
y_hat = model.predict(X)
train_acc = np.mean(y == y_hat)
y_val_hat = model.predict(X_val)
val_acc = np.mean(y_val == y_val_hat)
results[(alpha, lamda)] = (train_acc, val_acc)
print 'alpha =', alpha, 'lamda =', lamda, 'train_acc =', train_acc, 'val_acc =', val_acc
if val_acc > best_val:
best_model = model
best_val = val_acc
best_hpara = hpara
In [41]:
print 'best val. acc.:', best_val, 'best hpara:', best_hpara
In [42]:
# evaluate on test set
# Evaluate the best softmax on test set
y_test_hat = best_model.predict(X_test)
print 'test acc.:', np.mean(y_test_hat == y_test)
In [43]:
# Visualize the learned weights for each class
best_model.visualize_W()
In [ ]: