# Softmax exercise

Complete and hand in this completed worksheet (including its outputs and any supporting code outside of the worksheet) with your assignment submission. For more details see the assignments page on the course website.

This exercise is analogous to the SVM exercise. You will:

• implement a fully-vectorized loss function for the Softmax classifier
• implement the fully-vectorized expression for its analytic gradient
• use a validation set to tune the learning rate and regularization strength
• optimize the loss function with SGD
• visualize the final learned weights
``````

In [1]:

import os
os.chdir(os.getcwd() + '/..')

# Run some setup code for this notebook
import random
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;

``````
``````

In [4]:

# Load the raw CIFAR-10 data
cifar10_dir = 'datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

# Split the data
num_training = 49000
num_validation = 1000
num_test = 1000
num_dev = 500

# Preprocessing: reshape the image data into rows
X_train = X_train.reshape(X_train.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
X_dev = X_dev.reshape(X_dev.shape[0], -1)

# Normalize the data: subtract the mean rows
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image

# append the bias dimension of ones
X_train = np.hstack((X_train, np.ones((X_train.shape[0], 1))))
X_val = np.hstack((X_val, np.ones((X_val.shape[0], 1))))
X_test = np.hstack((X_test, np.ones((X_test.shape[0], 1))))
X_dev= np.hstack((X_dev, np.ones((X_dev.shape[0], 1))))

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

``````
``````

((49000, 3073), (1000, 3073), (1000, 3073), (500, 3073))

``````

## Softmax Classifier

``````

In [17]:

# softmax_loss_vectorized function
from classifiers.linear_classifier import softmax_loss_vectorized
import time

W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0)

# As a rough sanity check, our loss should be something close to -log(0.1).
print('loss: %f' % loss)
print('sanity check: %f' % (-np.log(0.1)))

``````
``````

loss: 2.386956
sanity check: 2.302585

``````

## Inline Question 1:

Why do we expect our loss to be close to -log(0.1)? Explain briefly.**

``````

In [19]:

loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0)

f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 0.0)[0]

loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 5e1)
f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 5e1)[0]

``````
``````

numerical: 0.736112, analytic: 0.736112, relative error: 4.939844e-08
numerical: 1.359312, analytic: 1.359312, relative error: 2.223424e-08
numerical: 3.672828, analytic: 3.672827, relative error: 2.815123e-08
numerical: 5.150041, analytic: 5.150041, relative error: 1.513723e-08
numerical: 0.439351, analytic: 0.439351, relative error: 5.945770e-08
numerical: 3.621422, analytic: 3.621422, relative error: 2.221157e-08
numerical: 1.943753, analytic: 1.943752, relative error: 5.199429e-08
numerical: 0.220881, analytic: 0.220881, relative error: 4.691754e-07
numerical: -9.092706, analytic: -9.092706, relative error: 4.104540e-09
numerical: 0.378179, analytic: 0.378179, relative error: 3.649779e-08
numerical: -1.764118, analytic: -1.764119, relative error: 2.513974e-08
numerical: 3.271042, analytic: 3.271042, relative error: 1.854753e-08
numerical: -1.781404, analytic: -1.781404, relative error: 3.165128e-08
numerical: -2.152419, analytic: -2.152419, relative error: 5.668960e-09
numerical: 0.350985, analytic: 0.350985, relative error: 2.059407e-07
numerical: -0.787027, analytic: -0.787027, relative error: 5.968350e-08
numerical: -1.181319, analytic: -1.181319, relative error: 1.497601e-09
numerical: -0.426054, analytic: -0.426054, relative error: 6.143153e-08
numerical: 4.932407, analytic: 4.932407, relative error: 2.360830e-08
numerical: -6.181916, analytic: -6.181916, relative error: 4.386489e-09

``````
``````

In [32]:

# Use the validation set to tune hyperparameters
# accuracy over 0.35 on the validation set.

from classifiers.linear_classifier import Softmax

results = {}
best_val = -1
best_softmax = None
learning_rates = [9e-7, 1e-6, 3e-6, 9e-6, 1e-5]
regularization_strengths = [0, 1e1, 2e1, 3e1]

for lr in learning_rates:
for reg in regularization_strengths:
model = Softmax()
model.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=5000,
batch_size=200, verbose=True)
y_train_pred = model.predict(X_train)
train_accuracy = np.mean(y_train == y_train_pred)
y_val_pred = model.predict(X_val)
val_accuracy = np.mean(y_val == y_val_pred)

results[(lr, reg)] = (train_accuracy, val_accuracy)
if val_accuracy > best_val:
best_val = val_accuracy
best_softmax = model

print('lr %e reg %e train_accuracy: %f val_accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))
print

for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train_accuracy: %f val_accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))

print
print('best validation accuracy achieved during cross-validation: %f' % best_val)

``````
``````

iteration 0 / 5000: loss 5.890208
iteration 100 / 5000: loss 3.194921
iteration 200 / 5000: loss 2.332129
iteration 300 / 5000: loss 2.300826
iteration 400 / 5000: loss 2.217096
iteration 500 / 5000: loss 2.161462
iteration 600 / 5000: loss 2.104302
iteration 700 / 5000: loss 2.125867
iteration 800 / 5000: loss 2.038327
iteration 900 / 5000: loss 2.169287
iteration 1000 / 5000: loss 2.121470
iteration 1100 / 5000: loss 1.999580
iteration 1200 / 5000: loss 2.106824
iteration 1300 / 5000: loss 1.826786
iteration 1400 / 5000: loss 2.143686
iteration 1500 / 5000: loss 2.039867
iteration 1600 / 5000: loss 1.956495
iteration 1700 / 5000: loss 1.888666
iteration 1800 / 5000: loss 1.908971
iteration 1900 / 5000: loss 1.861310
iteration 2000 / 5000: loss 1.938574
iteration 2100 / 5000: loss 1.838085
iteration 2200 / 5000: loss 1.940508
iteration 2300 / 5000: loss 2.019789
iteration 2400 / 5000: loss 1.789948
iteration 2500 / 5000: loss 1.925156
iteration 2600 / 5000: loss 1.902407
iteration 2700 / 5000: loss 1.909397
iteration 2800 / 5000: loss 1.825632
iteration 2900 / 5000: loss 1.824641
iteration 3000 / 5000: loss 1.918740
iteration 3100 / 5000: loss 1.795331
iteration 3200 / 5000: loss 1.759469
iteration 3300 / 5000: loss 1.543473
iteration 3400 / 5000: loss 1.920789
iteration 3500 / 5000: loss 1.774508
iteration 3600 / 5000: loss 1.837029
iteration 3700 / 5000: loss 1.755523
iteration 3800 / 5000: loss 1.779667
iteration 3900 / 5000: loss 1.855261
iteration 4000 / 5000: loss 1.892036
iteration 4100 / 5000: loss 1.710696
iteration 4200 / 5000: loss 1.833561
iteration 4300 / 5000: loss 1.887569
iteration 4400 / 5000: loss 1.957419
iteration 4500 / 5000: loss 1.837813
iteration 4600 / 5000: loss 1.849479
iteration 4700 / 5000: loss 1.850004
iteration 4800 / 5000: loss 1.685684
iteration 4900 / 5000: loss 1.766273
lr 9.000000e-07 reg 0.000000e+00 train_accuracy: 0.395347 val_accuracy: 0.354000
iteration 0 / 5000: loss 5.553815
iteration 100 / 5000: loss 3.221160
iteration 200 / 5000: loss 2.668618
iteration 300 / 5000: loss 2.629958
iteration 400 / 5000: loss 2.563990
iteration 500 / 5000: loss 2.665957
iteration 600 / 5000: loss 2.423917
iteration 700 / 5000: loss 2.546606
iteration 800 / 5000: loss 2.348642
iteration 900 / 5000: loss 2.167254
iteration 1000 / 5000: loss 2.351205
iteration 1100 / 5000: loss 2.267056
iteration 1200 / 5000: loss 2.321990
iteration 1300 / 5000: loss 2.576225
iteration 1400 / 5000: loss 2.256163
iteration 1500 / 5000: loss 2.128683
iteration 1600 / 5000: loss 2.284987
iteration 1700 / 5000: loss 2.328405
iteration 1800 / 5000: loss 2.172751
iteration 1900 / 5000: loss 2.018442
iteration 2000 / 5000: loss 2.195081
iteration 2100 / 5000: loss 2.128819
iteration 2200 / 5000: loss 2.259494
iteration 2300 / 5000: loss 2.092245
iteration 2400 / 5000: loss 2.162120
iteration 2500 / 5000: loss 2.129752
iteration 2600 / 5000: loss 2.122928
iteration 2700 / 5000: loss 1.953368
iteration 2800 / 5000: loss 2.069894
iteration 2900 / 5000: loss 2.004992
iteration 3000 / 5000: loss 2.100304
iteration 3100 / 5000: loss 2.252609
iteration 3200 / 5000: loss 1.952120
iteration 3300 / 5000: loss 2.008417
iteration 3400 / 5000: loss 1.922647
iteration 3500 / 5000: loss 2.000243
iteration 3600 / 5000: loss 1.998615
iteration 3700 / 5000: loss 2.134564
iteration 3800 / 5000: loss 1.965767
iteration 3900 / 5000: loss 2.208995
iteration 4000 / 5000: loss 1.961832
iteration 4100 / 5000: loss 1.910969
iteration 4200 / 5000: loss 2.143085
iteration 4300 / 5000: loss 2.004921
iteration 4400 / 5000: loss 1.953204
iteration 4500 / 5000: loss 2.026110
iteration 4600 / 5000: loss 1.734592
iteration 4700 / 5000: loss 1.899200
iteration 4800 / 5000: loss 1.876720
iteration 4900 / 5000: loss 1.875525
lr 9.000000e-07 reg 1.000000e+01 train_accuracy: 0.394551 val_accuracy: 0.355000
iteration 0 / 5000: loss 5.869720
iteration 100 / 5000: loss 3.382915
iteration 200 / 5000: loss 3.116546
iteration 300 / 5000: loss 2.858616
iteration 400 / 5000: loss 2.839334
iteration 500 / 5000: loss 2.961940
iteration 600 / 5000: loss 2.865022
iteration 700 / 5000: loss 2.712839
iteration 800 / 5000: loss 2.559554
iteration 900 / 5000: loss 2.548663
iteration 1000 / 5000: loss 2.483321
iteration 1100 / 5000: loss 2.422072
iteration 1200 / 5000: loss 2.391160
iteration 1300 / 5000: loss 2.348792
iteration 1400 / 5000: loss 2.642693
iteration 1500 / 5000: loss 2.357631
iteration 1600 / 5000: loss 2.317158
iteration 1700 / 5000: loss 2.586897
iteration 1800 / 5000: loss 2.315577
iteration 1900 / 5000: loss 2.321095
iteration 2000 / 5000: loss 2.434012
iteration 2100 / 5000: loss 2.309223
iteration 2200 / 5000: loss 2.451501
iteration 2300 / 5000: loss 2.388473
iteration 2400 / 5000: loss 2.446214
iteration 2500 / 5000: loss 2.350929
iteration 2600 / 5000: loss 2.384842
iteration 2700 / 5000: loss 2.159958
iteration 2800 / 5000: loss 2.231560
iteration 2900 / 5000: loss 2.258436
iteration 3000 / 5000: loss 2.217510
iteration 3100 / 5000: loss 2.047609
iteration 3200 / 5000: loss 2.270681
iteration 3300 / 5000: loss 2.204050
iteration 3400 / 5000: loss 2.258115
iteration 3500 / 5000: loss 2.330980
iteration 3600 / 5000: loss 2.298748
iteration 3700 / 5000: loss 2.146381
iteration 3800 / 5000: loss 2.090754
iteration 3900 / 5000: loss 2.260914
iteration 4000 / 5000: loss 2.259010
iteration 4100 / 5000: loss 2.358499
iteration 4200 / 5000: loss 2.152777
iteration 4300 / 5000: loss 2.115279
iteration 4400 / 5000: loss 2.148883
iteration 4500 / 5000: loss 2.126283
iteration 4600 / 5000: loss 2.079936
iteration 4700 / 5000: loss 2.195484
iteration 4800 / 5000: loss 2.104593
iteration 4900 / 5000: loss 2.087083
lr 9.000000e-07 reg 2.000000e+01 train_accuracy: 0.402776 val_accuracy: 0.390000
iteration 0 / 5000: loss 7.182758
iteration 100 / 5000: loss 3.792502
iteration 200 / 5000: loss 3.553387
iteration 300 / 5000: loss 3.110542
iteration 400 / 5000: loss 2.934089
iteration 500 / 5000: loss 2.977249
iteration 600 / 5000: loss 3.008682
iteration 700 / 5000: loss 2.863187
iteration 800 / 5000: loss 2.876661
iteration 900 / 5000: loss 2.933876
iteration 1000 / 5000: loss 2.904450
iteration 1100 / 5000: loss 2.609439
iteration 1200 / 5000: loss 2.538429
iteration 1300 / 5000: loss 2.870303
iteration 1400 / 5000: loss 2.607766
iteration 1500 / 5000: loss 2.615765
iteration 1600 / 5000: loss 2.631439
iteration 1700 / 5000: loss 2.754391
iteration 1800 / 5000: loss 2.667276
iteration 1900 / 5000: loss 2.637717
iteration 2000 / 5000: loss 2.528657
iteration 2100 / 5000: loss 2.528899
iteration 2200 / 5000: loss 2.613694
iteration 2300 / 5000: loss 2.651468
iteration 2400 / 5000: loss 2.475072
iteration 2500 / 5000: loss 2.542516
iteration 2600 / 5000: loss 2.429088
iteration 2700 / 5000: loss 2.560790
iteration 2800 / 5000: loss 2.374145
iteration 2900 / 5000: loss 2.577092
iteration 3000 / 5000: loss 2.317013
iteration 3100 / 5000: loss 2.377235
iteration 3200 / 5000: loss 2.409451
iteration 3300 / 5000: loss 2.394383
iteration 3400 / 5000: loss 2.250793
iteration 3500 / 5000: loss 2.302346
iteration 3600 / 5000: loss 2.218330
iteration 3700 / 5000: loss 2.354754
iteration 3800 / 5000: loss 2.281644
iteration 3900 / 5000: loss 2.311036
iteration 4000 / 5000: loss 2.326238
iteration 4100 / 5000: loss 2.336458
iteration 4200 / 5000: loss 2.213371
iteration 4300 / 5000: loss 2.353094
iteration 4400 / 5000: loss 2.230778
iteration 4500 / 5000: loss 2.324690
iteration 4600 / 5000: loss 2.123628
iteration 4700 / 5000: loss 2.235051
iteration 4800 / 5000: loss 2.325550
iteration 4900 / 5000: loss 2.154892
lr 9.000000e-07 reg 3.000000e+01 train_accuracy: 0.406041 val_accuracy: 0.384000

iteration 0 / 5000: loss 6.360572
iteration 100 / 5000: loss 2.801455
iteration 200 / 5000: loss 2.378760
iteration 300 / 5000: loss 2.486292
iteration 400 / 5000: loss 2.175007
iteration 500 / 5000: loss 2.324057
iteration 600 / 5000: loss 2.100465
iteration 700 / 5000: loss 2.070962
iteration 800 / 5000: loss 2.385727
iteration 900 / 5000: loss 1.999519
iteration 1000 / 5000: loss 1.928099
iteration 1100 / 5000: loss 2.198480
iteration 1200 / 5000: loss 1.895071
iteration 1300 / 5000: loss 1.880732
iteration 1400 / 5000: loss 1.919072
iteration 1500 / 5000: loss 2.078927
iteration 1600 / 5000: loss 1.869207
iteration 1700 / 5000: loss 1.918191
iteration 1800 / 5000: loss 1.880230
iteration 1900 / 5000: loss 2.038415
iteration 2000 / 5000: loss 1.828915
iteration 2100 / 5000: loss 1.827664
iteration 2200 / 5000: loss 1.904409
iteration 2300 / 5000: loss 1.965851
iteration 2400 / 5000: loss 1.817534
iteration 2500 / 5000: loss 1.861780
iteration 2600 / 5000: loss 1.921353
iteration 2700 / 5000: loss 1.876054
iteration 2800 / 5000: loss 1.762683
iteration 2900 / 5000: loss 1.915407
iteration 3000 / 5000: loss 1.739207
iteration 3100 / 5000: loss 1.745664
iteration 3200 / 5000: loss 1.890640
iteration 3300 / 5000: loss 1.755473
iteration 3400 / 5000: loss 1.692820
iteration 3500 / 5000: loss 1.742119
iteration 3600 / 5000: loss 1.762766
iteration 3700 / 5000: loss 1.685488
iteration 3800 / 5000: loss 1.933448
iteration 3900 / 5000: loss 1.739378
iteration 4000 / 5000: loss 1.752439
iteration 4100 / 5000: loss 1.809102
iteration 4200 / 5000: loss 1.618554
iteration 4300 / 5000: loss 1.810125
iteration 4400 / 5000: loss 1.732595
iteration 4500 / 5000: loss 1.689327
iteration 4600 / 5000: loss 1.836759
iteration 4700 / 5000: loss 1.744632
iteration 4800 / 5000: loss 1.751405
iteration 4900 / 5000: loss 1.865202
lr 1.000000e-06 reg 0.000000e+00 train_accuracy: 0.398714 val_accuracy: 0.391000
iteration 0 / 5000: loss 5.794066
iteration 100 / 5000: loss 3.302954
iteration 200 / 5000: loss 2.967713
iteration 300 / 5000: loss 2.603114
iteration 400 / 5000: loss 2.774242
iteration 500 / 5000: loss 2.542235
iteration 600 / 5000: loss 2.354923
iteration 700 / 5000: loss 2.428202
iteration 800 / 5000: loss 2.389617
iteration 900 / 5000: loss 2.237266
iteration 1000 / 5000: loss 2.197362
iteration 1100 / 5000: loss 2.253081
iteration 1200 / 5000: loss 2.435489
iteration 1300 / 5000: loss 2.222054
iteration 1400 / 5000: loss 2.211893
iteration 1500 / 5000: loss 2.255543
iteration 1600 / 5000: loss 2.168218
iteration 1700 / 5000: loss 2.103852
iteration 1800 / 5000: loss 2.079239
iteration 1900 / 5000: loss 2.191366
iteration 2000 / 5000: loss 2.178138
iteration 2100 / 5000: loss 2.247185
iteration 2200 / 5000: loss 2.116843
iteration 2300 / 5000: loss 2.173474
iteration 2400 / 5000: loss 2.084446
iteration 2500 / 5000: loss 1.931832
iteration 2600 / 5000: loss 2.061922
iteration 2700 / 5000: loss 2.125090
iteration 2800 / 5000: loss 2.018508
iteration 2900 / 5000: loss 2.162242
iteration 3000 / 5000: loss 2.150096
iteration 3100 / 5000: loss 2.006336
iteration 3200 / 5000: loss 1.961450
iteration 3300 / 5000: loss 2.120278
iteration 3400 / 5000: loss 1.883099
iteration 3500 / 5000: loss 2.016540
iteration 3600 / 5000: loss 1.991573
iteration 3700 / 5000: loss 1.963749
iteration 3800 / 5000: loss 2.012203
iteration 3900 / 5000: loss 2.026721
iteration 4000 / 5000: loss 1.987842
iteration 4100 / 5000: loss 1.993377
iteration 4200 / 5000: loss 2.040095
iteration 4300 / 5000: loss 1.947811
iteration 4400 / 5000: loss 1.907277
iteration 4500 / 5000: loss 1.960246
iteration 4600 / 5000: loss 1.922833
iteration 4700 / 5000: loss 1.870092
iteration 4800 / 5000: loss 2.078417
iteration 4900 / 5000: loss 1.959109
lr 1.000000e-06 reg 1.000000e+01 train_accuracy: 0.404694 val_accuracy: 0.376000
iteration 0 / 5000: loss 6.867621
iteration 100 / 5000: loss 3.336973
iteration 200 / 5000: loss 2.812020
iteration 300 / 5000: loss 2.953249
iteration 400 / 5000: loss 3.052467
iteration 500 / 5000: loss 2.586129
iteration 600 / 5000: loss 2.736371
iteration 700 / 5000: loss 2.583538
iteration 800 / 5000: loss 2.610087
iteration 900 / 5000: loss 2.632078
iteration 1000 / 5000: loss 2.328755
iteration 1100 / 5000: loss 2.592812
iteration 1200 / 5000: loss 2.247854
iteration 1300 / 5000: loss 2.459121
iteration 1400 / 5000: loss 2.477791
iteration 1500 / 5000: loss 2.479667
iteration 1600 / 5000: loss 2.546541
iteration 1700 / 5000: loss 2.314790
iteration 1800 / 5000: loss 2.289411
iteration 1900 / 5000: loss 2.391152
iteration 2000 / 5000: loss 2.288120
iteration 2100 / 5000: loss 2.425690
iteration 2200 / 5000: loss 2.226877
iteration 2300 / 5000: loss 2.419409
iteration 2400 / 5000: loss 2.296957
iteration 2500 / 5000: loss 2.227254
iteration 2600 / 5000: loss 2.188369
iteration 2700 / 5000: loss 2.286898
iteration 2800 / 5000: loss 2.169696
iteration 2900 / 5000: loss 2.310491
iteration 3000 / 5000: loss 2.301592
iteration 3100 / 5000: loss 2.205675
iteration 3200 / 5000: loss 2.186299
iteration 3300 / 5000: loss 2.077411
iteration 3400 / 5000: loss 2.383817
iteration 3500 / 5000: loss 2.342136
iteration 3600 / 5000: loss 2.007667
iteration 3700 / 5000: loss 2.134019
iteration 3800 / 5000: loss 2.183024
iteration 3900 / 5000: loss 2.196769
iteration 4000 / 5000: loss 2.040267
iteration 4100 / 5000: loss 2.175368
iteration 4200 / 5000: loss 2.200378
iteration 4300 / 5000: loss 1.998870
iteration 4400 / 5000: loss 2.254803
iteration 4500 / 5000: loss 2.029928
iteration 4600 / 5000: loss 2.135375
iteration 4700 / 5000: loss 2.165441
iteration 4800 / 5000: loss 2.023412
iteration 4900 / 5000: loss 2.324362
lr 1.000000e-06 reg 2.000000e+01 train_accuracy: 0.408184 val_accuracy: 0.386000
iteration 0 / 5000: loss 7.770952
iteration 100 / 5000: loss 3.398525
iteration 200 / 5000: loss 3.403882
iteration 300 / 5000: loss 3.206043
iteration 400 / 5000: loss 3.148112
iteration 500 / 5000: loss 2.945231
iteration 600 / 5000: loss 2.798986
iteration 700 / 5000: loss 2.850944
iteration 800 / 5000: loss 2.917812
iteration 900 / 5000: loss 2.764171
iteration 1000 / 5000: loss 2.734237
iteration 1100 / 5000: loss 2.733411
iteration 1200 / 5000: loss 2.764819
iteration 1300 / 5000: loss 2.932004
iteration 1400 / 5000: loss 2.653553
iteration 1500 / 5000: loss 2.610811
iteration 1600 / 5000: loss 2.759274
iteration 1700 / 5000: loss 2.553949
iteration 1800 / 5000: loss 2.470591
iteration 1900 / 5000: loss 2.604942
iteration 2000 / 5000: loss 2.514193
iteration 2100 / 5000: loss 2.631097
iteration 2200 / 5000: loss 2.495612
iteration 2300 / 5000: loss 2.591848
iteration 2400 / 5000: loss 2.343353
iteration 2500 / 5000: loss 2.404255
iteration 2600 / 5000: loss 2.345186
iteration 2700 / 5000: loss 2.495721
iteration 2800 / 5000: loss 2.232771
iteration 2900 / 5000: loss 2.229923
iteration 3000 / 5000: loss 2.455878
iteration 3100 / 5000: loss 2.459201
iteration 3200 / 5000: loss 2.415375
iteration 3300 / 5000: loss 2.260621
iteration 3400 / 5000: loss 2.327576
iteration 3500 / 5000: loss 2.182609
iteration 3600 / 5000: loss 2.269909
iteration 3700 / 5000: loss 2.302206
iteration 3800 / 5000: loss 2.240540
iteration 3900 / 5000: loss 2.461036
iteration 4000 / 5000: loss 2.323066
iteration 4100 / 5000: loss 2.159657
iteration 4200 / 5000: loss 2.340117
iteration 4300 / 5000: loss 2.071784
iteration 4400 / 5000: loss 2.198875
iteration 4500 / 5000: loss 2.257691
iteration 4600 / 5000: loss 2.239220
iteration 4700 / 5000: loss 2.248447
iteration 4800 / 5000: loss 2.082845
iteration 4900 / 5000: loss 2.110153
lr 1.000000e-06 reg 3.000000e+01 train_accuracy: 0.410796 val_accuracy: 0.380000

iteration 0 / 5000: loss 5.579101
iteration 100 / 5000: loss 2.359505
iteration 200 / 5000: loss 2.208382
iteration 300 / 5000: loss 2.026929
iteration 400 / 5000: loss 2.086660
iteration 500 / 5000: loss 2.010847
iteration 600 / 5000: loss 1.788584
iteration 700 / 5000: loss 1.842434
iteration 800 / 5000: loss 1.976112
iteration 900 / 5000: loss 1.959415
iteration 1000 / 5000: loss 1.820819
iteration 1100 / 5000: loss 2.018902
iteration 1200 / 5000: loss 1.702677
iteration 1300 / 5000: loss 1.818799
iteration 1400 / 5000: loss 1.751906
iteration 1500 / 5000: loss 1.852733
iteration 1600 / 5000: loss 1.780450
iteration 1700 / 5000: loss 1.745322
iteration 1800 / 5000: loss 1.738104
iteration 1900 / 5000: loss 1.758823
iteration 2000 / 5000: loss 1.757212
iteration 2100 / 5000: loss 1.718626
iteration 2200 / 5000: loss 1.801529
iteration 2300 / 5000: loss 2.037068
iteration 2400 / 5000: loss 1.626514
iteration 2500 / 5000: loss 1.669359
iteration 2600 / 5000: loss 1.845450
iteration 2700 / 5000: loss 1.734267
iteration 2800 / 5000: loss 1.800408
iteration 2900 / 5000: loss 1.763581
iteration 3000 / 5000: loss 1.825110
iteration 3100 / 5000: loss 1.812320
iteration 3200 / 5000: loss 1.878842
iteration 3300 / 5000: loss 1.689380
iteration 3400 / 5000: loss 1.773579
iteration 3500 / 5000: loss 1.677485
iteration 3600 / 5000: loss 1.660707
iteration 3700 / 5000: loss 1.864938
iteration 3800 / 5000: loss 1.636848
iteration 3900 / 5000: loss 1.656711
iteration 4000 / 5000: loss 1.811295
iteration 4100 / 5000: loss 1.941059
iteration 4200 / 5000: loss 1.873774
iteration 4300 / 5000: loss 1.833638
iteration 4400 / 5000: loss 1.757460
iteration 4500 / 5000: loss 1.679926
iteration 4600 / 5000: loss 1.723179
iteration 4700 / 5000: loss 1.632027
iteration 4800 / 5000: loss 1.710437
iteration 4900 / 5000: loss 1.640681
lr 3.000000e-06 reg 0.000000e+00 train_accuracy: 0.422204 val_accuracy: 0.367000
iteration 0 / 5000: loss 6.024246
iteration 100 / 5000: loss 2.718068
iteration 200 / 5000: loss 2.508051
iteration 300 / 5000: loss 2.306324
iteration 400 / 5000: loss 2.162522
iteration 500 / 5000: loss 2.364846
iteration 600 / 5000: loss 2.191111
iteration 700 / 5000: loss 2.134356
iteration 800 / 5000: loss 2.073078
iteration 900 / 5000: loss 2.093611
iteration 1000 / 5000: loss 2.153518
iteration 1100 / 5000: loss 2.183059
iteration 1200 / 5000: loss 2.083309
iteration 1300 / 5000: loss 2.086944
iteration 1400 / 5000: loss 2.153596
iteration 1500 / 5000: loss 1.873892
iteration 1600 / 5000: loss 2.104566
iteration 1700 / 5000: loss 2.178526
iteration 1800 / 5000: loss 2.059268
iteration 1900 / 5000: loss 1.967444
iteration 2000 / 5000: loss 1.974806
iteration 2100 / 5000: loss 2.049856
iteration 2200 / 5000: loss 2.114878
iteration 2300 / 5000: loss 2.007107
iteration 2400 / 5000: loss 2.080178
iteration 2500 / 5000: loss 1.838814
iteration 2600 / 5000: loss 1.995751
iteration 2700 / 5000: loss 1.853581
iteration 2800 / 5000: loss 1.882043
iteration 2900 / 5000: loss 1.859659
iteration 3000 / 5000: loss 1.929152
iteration 3100 / 5000: loss 1.839619
iteration 3200 / 5000: loss 1.916338
iteration 3300 / 5000: loss 1.969971
iteration 3400 / 5000: loss 1.983189
iteration 3500 / 5000: loss 1.976237
iteration 3600 / 5000: loss 1.708267
iteration 3700 / 5000: loss 1.822492
iteration 3800 / 5000: loss 1.832224
iteration 3900 / 5000: loss 1.791971
iteration 4000 / 5000: loss 1.925640
iteration 4100 / 5000: loss 1.837499
iteration 4200 / 5000: loss 1.909463
iteration 4300 / 5000: loss 1.997981
iteration 4400 / 5000: loss 1.810655
iteration 4500 / 5000: loss 1.970834
iteration 4600 / 5000: loss 1.820844
iteration 4700 / 5000: loss 1.780043
iteration 4800 / 5000: loss 1.961121
iteration 4900 / 5000: loss 1.853914
lr 3.000000e-06 reg 1.000000e+01 train_accuracy: 0.429755 val_accuracy: 0.393000
iteration 0 / 5000: loss 6.454193
iteration 100 / 5000: loss 2.909984
iteration 200 / 5000: loss 2.672255
iteration 300 / 5000: loss 2.738385
iteration 400 / 5000: loss 2.562657
iteration 500 / 5000: loss 2.381145
iteration 600 / 5000: loss 2.503709
iteration 700 / 5000: loss 2.514114
iteration 800 / 5000: loss 2.258335
iteration 900 / 5000: loss 2.393176
iteration 1000 / 5000: loss 2.330656
iteration 1100 / 5000: loss 2.092920
iteration 1200 / 5000: loss 2.146216
iteration 1300 / 5000: loss 2.261024
iteration 1400 / 5000: loss 2.259106
iteration 1500 / 5000: loss 2.086675
iteration 1600 / 5000: loss 2.116290
iteration 1700 / 5000: loss 2.060138
iteration 1800 / 5000: loss 1.912304
iteration 1900 / 5000: loss 2.120896
iteration 2000 / 5000: loss 2.202104
iteration 2100 / 5000: loss 2.052783
iteration 2200 / 5000: loss 1.953162
iteration 2300 / 5000: loss 2.148624
iteration 2400 / 5000: loss 1.929310
iteration 2500 / 5000: loss 2.081393
iteration 2600 / 5000: loss 1.904832
iteration 2700 / 5000: loss 2.004426
iteration 2800 / 5000: loss 2.041455
iteration 2900 / 5000: loss 1.955354
iteration 3000 / 5000: loss 1.959096
iteration 3100 / 5000: loss 1.891320
iteration 3200 / 5000: loss 1.980676
iteration 3300 / 5000: loss 1.754644
iteration 3400 / 5000: loss 1.889816
iteration 3500 / 5000: loss 1.970623
iteration 3600 / 5000: loss 1.954540
iteration 3700 / 5000: loss 1.890593
iteration 3800 / 5000: loss 1.851183
iteration 3900 / 5000: loss 1.975085
iteration 4000 / 5000: loss 1.898329
iteration 4100 / 5000: loss 1.864786
iteration 4200 / 5000: loss 1.980209
iteration 4300 / 5000: loss 1.842804
iteration 4400 / 5000: loss 1.825380
iteration 4500 / 5000: loss 1.830144
iteration 4600 / 5000: loss 1.808343
iteration 4700 / 5000: loss 1.860571
iteration 4800 / 5000: loss 1.859681
iteration 4900 / 5000: loss 1.855746
lr 3.000000e-06 reg 2.000000e+01 train_accuracy: 0.436898 val_accuracy: 0.398000
iteration 0 / 5000: loss 6.836626
iteration 100 / 5000: loss 3.393198
iteration 200 / 5000: loss 2.786902
iteration 300 / 5000: loss 2.804638
iteration 400 / 5000: loss 2.622994
iteration 500 / 5000: loss 2.546162
iteration 600 / 5000: loss 2.536640
iteration 700 / 5000: loss 2.554380
iteration 800 / 5000: loss 2.301646
iteration 900 / 5000: loss 2.558034
iteration 1000 / 5000: loss 2.347489
iteration 1100 / 5000: loss 2.254336
iteration 1200 / 5000: loss 2.267181
iteration 1300 / 5000: loss 2.250335
iteration 1400 / 5000: loss 2.228238
iteration 1500 / 5000: loss 2.103388
iteration 1600 / 5000: loss 2.181427
iteration 1700 / 5000: loss 2.216838
iteration 1800 / 5000: loss 2.138330
iteration 1900 / 5000: loss 2.262807
iteration 2000 / 5000: loss 2.094658
iteration 2100 / 5000: loss 2.129619
iteration 2200 / 5000: loss 1.982128
iteration 2300 / 5000: loss 2.151801
iteration 2400 / 5000: loss 1.999050
iteration 2500 / 5000: loss 2.065991
iteration 2600 / 5000: loss 1.953773
iteration 2700 / 5000: loss 2.160250
iteration 2800 / 5000: loss 1.973993
iteration 2900 / 5000: loss 2.075466
iteration 3000 / 5000: loss 2.019466
iteration 3100 / 5000: loss 1.900529
iteration 3200 / 5000: loss 1.857706
iteration 3300 / 5000: loss 1.992052
iteration 3400 / 5000: loss 1.931678
iteration 3500 / 5000: loss 2.075879
iteration 3600 / 5000: loss 1.992027
iteration 3700 / 5000: loss 1.742050
iteration 3800 / 5000: loss 1.927766
iteration 3900 / 5000: loss 1.917718
iteration 4000 / 5000: loss 1.880454
iteration 4100 / 5000: loss 1.875124
iteration 4200 / 5000: loss 1.913980
iteration 4300 / 5000: loss 1.834700
iteration 4400 / 5000: loss 1.886777
iteration 4500 / 5000: loss 1.818882
iteration 4600 / 5000: loss 1.786406
iteration 4700 / 5000: loss 1.864157
iteration 4800 / 5000: loss 1.895410
iteration 4900 / 5000: loss 1.778008
lr 3.000000e-06 reg 3.000000e+01 train_accuracy: 0.428061 val_accuracy: 0.409000

iteration 0 / 5000: loss 5.255086
iteration 100 / 5000: loss 3.432331
iteration 200 / 5000: loss 2.809733
iteration 300 / 5000: loss 2.470584
iteration 400 / 5000: loss 2.594360
iteration 500 / 5000: loss 2.224085
iteration 600 / 5000: loss 3.102964
iteration 700 / 5000: loss 2.932607
iteration 800 / 5000: loss 2.062852
iteration 900 / 5000: loss 2.803011
iteration 1000 / 5000: loss 2.098896
iteration 1100 / 5000: loss 2.742945
iteration 1200 / 5000: loss 2.879248
iteration 1300 / 5000: loss 2.703534
iteration 1400 / 5000: loss 2.921872
iteration 1500 / 5000: loss 2.221160
iteration 1600 / 5000: loss 1.880621
iteration 1700 / 5000: loss 2.541900
iteration 1800 / 5000: loss 2.204998
iteration 1900 / 5000: loss 2.794943
iteration 2000 / 5000: loss 2.264381
iteration 2100 / 5000: loss 2.747814
iteration 2200 / 5000: loss 1.963404
iteration 2300 / 5000: loss 2.331548
iteration 2400 / 5000: loss 2.598159
iteration 2500 / 5000: loss 2.202516
iteration 2600 / 5000: loss 2.712103
iteration 2700 / 5000: loss 3.228488
iteration 2800 / 5000: loss 2.238277
iteration 2900 / 5000: loss 2.091414
iteration 3000 / 5000: loss 1.916260
iteration 3100 / 5000: loss 2.321303
iteration 3200 / 5000: loss 2.366662
iteration 3300 / 5000: loss 2.923833
iteration 3400 / 5000: loss 2.371701
iteration 3500 / 5000: loss 2.131748
iteration 3600 / 5000: loss 1.853840
iteration 3700 / 5000: loss 2.649496
iteration 3800 / 5000: loss 2.136003
iteration 3900 / 5000: loss 2.925092
iteration 4000 / 5000: loss 2.518851
iteration 4100 / 5000: loss 3.085701
iteration 4200 / 5000: loss 2.074761
iteration 4300 / 5000: loss 1.854290
iteration 4400 / 5000: loss 2.610373
iteration 4500 / 5000: loss 2.892520
iteration 4600 / 5000: loss 3.357470
iteration 4700 / 5000: loss 2.098251
iteration 4800 / 5000: loss 1.924147
iteration 4900 / 5000: loss 2.069370
lr 9.000000e-06 reg 0.000000e+00 train_accuracy: 0.386388 val_accuracy: 0.342000
iteration 0 / 5000: loss 5.041096
iteration 100 / 5000: loss 2.800540
iteration 200 / 5000: loss 2.556524
iteration 300 / 5000: loss 2.770911
iteration 400 / 5000: loss 2.359258
iteration 500 / 5000: loss 2.867574
iteration 600 / 5000: loss 2.768230
iteration 700 / 5000: loss 2.568220
iteration 800 / 5000: loss 2.634586
iteration 900 / 5000: loss 2.584697
iteration 1000 / 5000: loss 2.048333
iteration 1100 / 5000: loss 2.578358
iteration 1200 / 5000: loss 2.980895
iteration 1300 / 5000: loss 2.487833
iteration 1400 / 5000: loss 3.699052
iteration 1500 / 5000: loss 2.273859
iteration 1600 / 5000: loss 2.298473
iteration 1700 / 5000: loss 2.592677
iteration 1800 / 5000: loss 2.071713
iteration 1900 / 5000: loss 2.850635
iteration 2000 / 5000: loss 2.648414
iteration 2100 / 5000: loss 2.437301
iteration 2200 / 5000: loss 2.251392
iteration 2300 / 5000: loss 2.559390
iteration 2400 / 5000: loss 2.625447
iteration 2500 / 5000: loss 2.468310
iteration 2600 / 5000: loss 3.030068
iteration 2700 / 5000: loss 2.256557
iteration 2800 / 5000: loss 2.860518
iteration 2900 / 5000: loss 1.830774
iteration 3000 / 5000: loss 2.307239
iteration 3100 / 5000: loss 2.386559
iteration 3200 / 5000: loss 2.853774
iteration 3300 / 5000: loss 3.345607
iteration 3400 / 5000: loss 3.216684
iteration 3500 / 5000: loss 2.375279
iteration 3600 / 5000: loss 3.041149
iteration 3700 / 5000: loss 2.478628
iteration 3800 / 5000: loss 2.362251
iteration 3900 / 5000: loss 2.682318
iteration 4000 / 5000: loss 2.862485
iteration 4100 / 5000: loss 2.578177
iteration 4200 / 5000: loss 2.991762
iteration 4300 / 5000: loss 2.256345
iteration 4400 / 5000: loss 2.010826
iteration 4500 / 5000: loss 4.033802
iteration 4600 / 5000: loss 2.691317
iteration 4700 / 5000: loss 2.500369
iteration 4800 / 5000: loss 3.278342
iteration 4900 / 5000: loss 2.181368
lr 9.000000e-06 reg 1.000000e+01 train_accuracy: 0.321143 val_accuracy: 0.290000
iteration 0 / 5000: loss 5.336677
iteration 100 / 5000: loss 2.909316
iteration 200 / 5000: loss 3.006543
iteration 300 / 5000: loss 2.986438
iteration 400 / 5000: loss 3.291382
iteration 500 / 5000: loss 2.492118
iteration 600 / 5000: loss 2.676696
iteration 700 / 5000: loss 2.733394
iteration 800 / 5000: loss 3.737363
iteration 900 / 5000: loss 2.818817
iteration 1000 / 5000: loss 2.597523
iteration 1100 / 5000: loss 3.773572
iteration 1200 / 5000: loss 3.155225
iteration 1300 / 5000: loss 2.593658
iteration 1400 / 5000: loss 2.806002
iteration 1500 / 5000: loss 2.546008
iteration 1600 / 5000: loss 2.546144
iteration 1700 / 5000: loss 2.541680
iteration 1800 / 5000: loss 2.767312
iteration 1900 / 5000: loss 2.449708
iteration 2000 / 5000: loss 2.606908
iteration 2100 / 5000: loss 2.371782
iteration 2200 / 5000: loss 2.969236
iteration 2300 / 5000: loss 2.818390
iteration 2400 / 5000: loss 3.377813
iteration 2500 / 5000: loss 3.328898
iteration 2600 / 5000: loss 3.280727
iteration 2700 / 5000: loss 2.192152
iteration 2800 / 5000: loss 2.159228
iteration 2900 / 5000: loss 2.574870
iteration 3000 / 5000: loss 2.077236
iteration 3100 / 5000: loss 2.502681
iteration 3200 / 5000: loss 2.328942
iteration 3300 / 5000: loss 2.238873
iteration 3400 / 5000: loss 2.266940
iteration 3500 / 5000: loss 2.233484
iteration 3600 / 5000: loss 2.454057
iteration 3700 / 5000: loss 2.455141
iteration 3800 / 5000: loss 2.906823
iteration 3900 / 5000: loss 2.468250
iteration 4000 / 5000: loss 2.312085
iteration 4100 / 5000: loss 2.311764
iteration 4200 / 5000: loss 2.309835
iteration 4300 / 5000: loss 2.217428
iteration 4400 / 5000: loss 2.897619
iteration 4500 / 5000: loss 2.642046
iteration 4600 / 5000: loss 2.519733
iteration 4700 / 5000: loss 2.280202
iteration 4800 / 5000: loss 2.273049
iteration 4900 / 5000: loss 2.614350
lr 9.000000e-06 reg 2.000000e+01 train_accuracy: 0.328653 val_accuracy: 0.322000
iteration 0 / 5000: loss 6.038862
iteration 100 / 5000: loss 3.186071
iteration 200 / 5000: loss 3.861151
iteration 300 / 5000: loss 3.310606
iteration 400 / 5000: loss 2.618484
iteration 500 / 5000: loss 2.793410
iteration 600 / 5000: loss 2.905470
iteration 700 / 5000: loss 2.912736
iteration 800 / 5000: loss 2.707339
iteration 900 / 5000: loss 2.913764
iteration 1000 / 5000: loss 2.654258
iteration 1100 / 5000: loss 2.614473
iteration 1200 / 5000: loss 2.320458
iteration 1300 / 5000: loss 2.589366
iteration 1400 / 5000: loss 2.427446
iteration 1500 / 5000: loss 4.614443
iteration 1600 / 5000: loss 2.104315
iteration 1700 / 5000: loss 2.319863
iteration 1800 / 5000: loss 2.186571
iteration 1900 / 5000: loss 2.517079
iteration 2000 / 5000: loss 2.267435
iteration 2100 / 5000: loss 2.315993
iteration 2200 / 5000: loss 2.145067
iteration 2300 / 5000: loss 2.797983
iteration 2400 / 5000: loss 2.298326
iteration 2500 / 5000: loss 2.485202
iteration 2600 / 5000: loss 2.492742
iteration 2700 / 5000: loss 2.337195
iteration 2800 / 5000: loss 3.212484
iteration 2900 / 5000: loss 2.267784
iteration 3000 / 5000: loss 3.203229
iteration 3100 / 5000: loss 2.279870
iteration 3200 / 5000: loss 3.388173
iteration 3300 / 5000: loss 2.151430
iteration 3400 / 5000: loss 2.594671
iteration 3500 / 5000: loss 3.235025
iteration 3600 / 5000: loss 2.345452
iteration 3700 / 5000: loss 2.257420
iteration 3800 / 5000: loss 3.052169
iteration 3900 / 5000: loss 2.541563
iteration 4000 / 5000: loss 2.170400
iteration 4100 / 5000: loss 2.670277
iteration 4200 / 5000: loss 2.162022
iteration 4300 / 5000: loss 2.796344
iteration 4400 / 5000: loss 2.667445
iteration 4500 / 5000: loss 3.126176
iteration 4600 / 5000: loss 3.276190
iteration 4700 / 5000: loss 2.018075
iteration 4800 / 5000: loss 2.004788
iteration 4900 / 5000: loss 3.537694
lr 9.000000e-06 reg 3.000000e+01 train_accuracy: 0.288082 val_accuracy: 0.285000

iteration 0 / 5000: loss 5.016661
iteration 100 / 5000: loss 4.155857
iteration 200 / 5000: loss 2.946559
iteration 300 / 5000: loss 3.785657
iteration 400 / 5000: loss 3.222257
iteration 500 / 5000: loss 2.546774
iteration 600 / 5000: loss 2.656769
iteration 700 / 5000: loss 2.939526
iteration 800 / 5000: loss 3.634860
iteration 900 / 5000: loss 3.478608
iteration 1000 / 5000: loss 2.404828
iteration 1100 / 5000: loss 2.169717
iteration 1200 / 5000: loss 2.214158
iteration 1300 / 5000: loss 2.408467
iteration 1400 / 5000: loss 2.599862
iteration 1500 / 5000: loss 2.997066
iteration 1600 / 5000: loss 2.405257
iteration 1700 / 5000: loss 2.039231
iteration 1800 / 5000: loss 2.854190
iteration 1900 / 5000: loss 2.781875
iteration 2000 / 5000: loss 3.430944
iteration 2100 / 5000: loss 2.388086
iteration 2200 / 5000: loss 2.351513
iteration 2300 / 5000: loss 2.550103
iteration 2400 / 5000: loss 2.116575
iteration 2500 / 5000: loss 2.365894
iteration 2600 / 5000: loss 2.380938
iteration 2700 / 5000: loss 3.008602
iteration 2800 / 5000: loss 3.658783
iteration 2900 / 5000: loss 2.476036
iteration 3000 / 5000: loss 2.732098
iteration 3100 / 5000: loss 2.292632
iteration 3200 / 5000: loss 2.865500
iteration 3300 / 5000: loss 2.334434
iteration 3400 / 5000: loss 2.843120
iteration 3500 / 5000: loss 2.581096
iteration 3600 / 5000: loss 2.752521
iteration 3700 / 5000: loss 2.494821
iteration 3800 / 5000: loss 2.965576
iteration 3900 / 5000: loss 3.565420
iteration 4000 / 5000: loss 2.372658
iteration 4100 / 5000: loss 2.682700
iteration 4200 / 5000: loss 2.688395
iteration 4300 / 5000: loss 2.575862
iteration 4400 / 5000: loss 3.432930
iteration 4500 / 5000: loss 2.575548
iteration 4600 / 5000: loss 3.153729
iteration 4700 / 5000: loss 2.630418
iteration 4800 / 5000: loss 2.795335
iteration 4900 / 5000: loss 2.188269
lr 1.000000e-05 reg 0.000000e+00 train_accuracy: 0.366286 val_accuracy: 0.341000
iteration 0 / 5000: loss 5.791298
iteration 100 / 5000: loss 3.440958
iteration 200 / 5000: loss 3.779345
iteration 300 / 5000: loss 3.485605
iteration 400 / 5000: loss 2.726658
iteration 500 / 5000: loss 3.757652
iteration 600 / 5000: loss 3.530423
iteration 700 / 5000: loss 3.199600
iteration 800 / 5000: loss 3.027865
iteration 900 / 5000: loss 3.600032
iteration 1000 / 5000: loss 2.710552
iteration 1100 / 5000: loss 3.035919
iteration 1200 / 5000: loss 3.225897
iteration 1300 / 5000: loss 2.770037
iteration 1400 / 5000: loss 2.874390
iteration 1500 / 5000: loss 2.579563
iteration 1600 / 5000: loss 3.405138
iteration 1700 / 5000: loss 2.599846
iteration 1800 / 5000: loss 2.917686
iteration 1900 / 5000: loss 2.362158
iteration 2000 / 5000: loss 2.796984
iteration 2100 / 5000: loss 2.969405
iteration 2200 / 5000: loss 3.833965
iteration 2300 / 5000: loss 2.860082
iteration 2400 / 5000: loss 2.382388
iteration 2500 / 5000: loss 2.207060
iteration 2600 / 5000: loss 2.311406
iteration 2700 / 5000: loss 2.625075
iteration 2800 / 5000: loss 3.179251
iteration 2900 / 5000: loss 2.623061
iteration 3000 / 5000: loss 3.111991
iteration 3100 / 5000: loss 3.450343
iteration 3200 / 5000: loss 2.250156
iteration 3300 / 5000: loss 2.404153
iteration 3400 / 5000: loss 2.667039
iteration 3500 / 5000: loss 3.376922
iteration 3600 / 5000: loss 2.735514
iteration 3700 / 5000: loss 2.384534
iteration 3800 / 5000: loss 2.795430
iteration 3900 / 5000: loss 2.784857
iteration 4000 / 5000: loss 2.273599
iteration 4100 / 5000: loss 2.901629
iteration 4200 / 5000: loss 3.489448
iteration 4300 / 5000: loss 2.665009
iteration 4400 / 5000: loss 2.991187
iteration 4500 / 5000: loss 3.092421
iteration 4600 / 5000: loss 2.336501
iteration 4700 / 5000: loss 4.223221
iteration 4800 / 5000: loss 2.948641
iteration 4900 / 5000: loss 2.702853
lr 1.000000e-05 reg 1.000000e+01 train_accuracy: 0.384082 val_accuracy: 0.332000
iteration 0 / 5000: loss 6.163628
iteration 100 / 5000: loss 3.276208
iteration 200 / 5000: loss 3.456445
iteration 300 / 5000: loss 3.089058
iteration 400 / 5000: loss 4.906455
iteration 500 / 5000: loss 3.190141
iteration 600 / 5000: loss 2.986315
iteration 700 / 5000: loss 2.848284
iteration 800 / 5000: loss 3.214327
iteration 900 / 5000: loss 3.431559
iteration 1000 / 5000: loss 3.069665
iteration 1100 / 5000: loss 3.659971
iteration 1200 / 5000: loss 2.858215
iteration 1300 / 5000: loss 2.454452
iteration 1400 / 5000: loss 2.422976
iteration 1500 / 5000: loss 2.621479
iteration 1600 / 5000: loss 2.889756
iteration 1700 / 5000: loss 2.714707
iteration 1800 / 5000: loss 2.419300
iteration 1900 / 5000: loss 2.646918
iteration 2000 / 5000: loss 2.842511
iteration 2100 / 5000: loss 2.649382
iteration 2200 / 5000: loss 2.772975
iteration 2300 / 5000: loss 3.033201
iteration 2400 / 5000: loss 2.978408
iteration 2500 / 5000: loss 2.144853
iteration 2600 / 5000: loss 2.181829
iteration 2700 / 5000: loss 3.240294
iteration 2800 / 5000: loss 3.216088
iteration 2900 / 5000: loss 2.352159
iteration 3000 / 5000: loss 3.184868
iteration 3100 / 5000: loss 2.375062
iteration 3200 / 5000: loss 2.853425
iteration 3300 / 5000: loss 2.670030
iteration 3400 / 5000: loss 4.111603
iteration 3500 / 5000: loss 2.185689
iteration 3600 / 5000: loss 2.604840
iteration 3700 / 5000: loss 2.285340
iteration 3800 / 5000: loss 2.830536
iteration 3900 / 5000: loss 2.833625
iteration 4000 / 5000: loss 2.588755
iteration 4100 / 5000: loss 2.353959
iteration 4200 / 5000: loss 2.450299
iteration 4300 / 5000: loss 2.993622
iteration 4400 / 5000: loss 2.641635
iteration 4500 / 5000: loss 2.460381
iteration 4600 / 5000: loss 2.660724
iteration 4700 / 5000: loss 2.724993
iteration 4800 / 5000: loss 2.181915
iteration 4900 / 5000: loss 2.226334
lr 1.000000e-05 reg 2.000000e+01 train_accuracy: 0.289143 val_accuracy: 0.273000
iteration 0 / 5000: loss 7.159946
iteration 100 / 5000: loss 3.802031
iteration 200 / 5000: loss 4.027112
iteration 300 / 5000: loss 3.116680
iteration 400 / 5000: loss 2.850288
iteration 500 / 5000: loss 3.738379
iteration 600 / 5000: loss 4.282948
iteration 700 / 5000: loss 3.251790
iteration 800 / 5000: loss 2.750396
iteration 900 / 5000: loss 3.522368
iteration 1000 / 5000: loss 2.664103
iteration 1100 / 5000: loss 2.973611
iteration 1200 / 5000: loss 3.760410
iteration 1300 / 5000: loss 2.495893
iteration 1400 / 5000: loss 2.457744
iteration 1500 / 5000: loss 2.846360
iteration 1600 / 5000: loss 2.493234
iteration 1700 / 5000: loss 3.948903
iteration 1800 / 5000: loss 3.252895
iteration 1900 / 5000: loss 2.092049
iteration 2000 / 5000: loss 2.317510
iteration 2100 / 5000: loss 3.078219
iteration 2200 / 5000: loss 3.286067
iteration 2300 / 5000: loss 2.532288
iteration 2400 / 5000: loss 2.613338
iteration 2500 / 5000: loss 3.252645
iteration 2600 / 5000: loss 3.567793
iteration 2700 / 5000: loss 2.720252
iteration 2800 / 5000: loss 2.831134
iteration 2900 / 5000: loss 2.256788
iteration 3000 / 5000: loss 2.643043
iteration 3100 / 5000: loss 2.337396
iteration 3200 / 5000: loss 3.057481
iteration 3300 / 5000: loss 2.619867
iteration 3400 / 5000: loss 3.304847
iteration 3500 / 5000: loss 2.786456
iteration 3600 / 5000: loss 2.946485
iteration 3700 / 5000: loss 2.394184
iteration 3800 / 5000: loss 2.485016
iteration 3900 / 5000: loss 3.760162
iteration 4000 / 5000: loss 4.052855
iteration 4100 / 5000: loss 3.412868
iteration 4200 / 5000: loss 2.541334
iteration 4300 / 5000: loss 2.530453
iteration 4400 / 5000: loss 3.262062
iteration 4500 / 5000: loss 2.287390
iteration 4600 / 5000: loss 2.273288
iteration 4700 / 5000: loss 2.396527
iteration 4800 / 5000: loss 3.260886
iteration 4900 / 5000: loss 3.702511
lr 1.000000e-05 reg 3.000000e+01 train_accuracy: 0.301653 val_accuracy: 0.291000

lr 9.000000e-07 reg 0.000000e+00 train_accuracy: 0.395347 val_accuracy: 0.354000
lr 9.000000e-07 reg 1.000000e+01 train_accuracy: 0.394551 val_accuracy: 0.355000
lr 9.000000e-07 reg 2.000000e+01 train_accuracy: 0.402776 val_accuracy: 0.390000
lr 9.000000e-07 reg 3.000000e+01 train_accuracy: 0.406041 val_accuracy: 0.384000
lr 1.000000e-06 reg 0.000000e+00 train_accuracy: 0.398714 val_accuracy: 0.391000
lr 1.000000e-06 reg 1.000000e+01 train_accuracy: 0.404694 val_accuracy: 0.376000
lr 1.000000e-06 reg 2.000000e+01 train_accuracy: 0.408184 val_accuracy: 0.386000
lr 1.000000e-06 reg 3.000000e+01 train_accuracy: 0.410796 val_accuracy: 0.380000
lr 3.000000e-06 reg 0.000000e+00 train_accuracy: 0.422204 val_accuracy: 0.367000
lr 3.000000e-06 reg 1.000000e+01 train_accuracy: 0.429755 val_accuracy: 0.393000
lr 3.000000e-06 reg 2.000000e+01 train_accuracy: 0.436898 val_accuracy: 0.398000
lr 3.000000e-06 reg 3.000000e+01 train_accuracy: 0.428061 val_accuracy: 0.409000
lr 9.000000e-06 reg 0.000000e+00 train_accuracy: 0.386388 val_accuracy: 0.342000
lr 9.000000e-06 reg 1.000000e+01 train_accuracy: 0.321143 val_accuracy: 0.290000
lr 9.000000e-06 reg 2.000000e+01 train_accuracy: 0.328653 val_accuracy: 0.322000
lr 9.000000e-06 reg 3.000000e+01 train_accuracy: 0.288082 val_accuracy: 0.285000
lr 1.000000e-05 reg 0.000000e+00 train_accuracy: 0.366286 val_accuracy: 0.341000
lr 1.000000e-05 reg 1.000000e+01 train_accuracy: 0.384082 val_accuracy: 0.332000
lr 1.000000e-05 reg 2.000000e+01 train_accuracy: 0.289143 val_accuracy: 0.273000
lr 1.000000e-05 reg 3.000000e+01 train_accuracy: 0.301653 val_accuracy: 0.291000

best validation accuracy achieved during cross-validation: 0.409000

``````
``````

In [33]:

for lr, reg in sorted(results):
train_accuracy, val_accuracy = results[(lr, reg)]
print('lr %e reg %e train_accuracy: %f val_accuracy: %f' % (lr, reg, train_accuracy, val_accuracy))

print
print('best validation accuracy achieved during cross-validation: %f' % best_val)

``````
``````

lr 9.000000e-07 reg 0.000000e+00 train_accuracy: 0.395347 val_accuracy: 0.354000
lr 9.000000e-07 reg 1.000000e+01 train_accuracy: 0.394551 val_accuracy: 0.355000
lr 9.000000e-07 reg 2.000000e+01 train_accuracy: 0.402776 val_accuracy: 0.390000
lr 9.000000e-07 reg 3.000000e+01 train_accuracy: 0.406041 val_accuracy: 0.384000
lr 1.000000e-06 reg 0.000000e+00 train_accuracy: 0.398714 val_accuracy: 0.391000
lr 1.000000e-06 reg 1.000000e+01 train_accuracy: 0.404694 val_accuracy: 0.376000
lr 1.000000e-06 reg 2.000000e+01 train_accuracy: 0.408184 val_accuracy: 0.386000
lr 1.000000e-06 reg 3.000000e+01 train_accuracy: 0.410796 val_accuracy: 0.380000
lr 3.000000e-06 reg 0.000000e+00 train_accuracy: 0.422204 val_accuracy: 0.367000
lr 3.000000e-06 reg 1.000000e+01 train_accuracy: 0.429755 val_accuracy: 0.393000
lr 3.000000e-06 reg 2.000000e+01 train_accuracy: 0.436898 val_accuracy: 0.398000
lr 3.000000e-06 reg 3.000000e+01 train_accuracy: 0.428061 val_accuracy: 0.409000
lr 9.000000e-06 reg 0.000000e+00 train_accuracy: 0.386388 val_accuracy: 0.342000
lr 9.000000e-06 reg 1.000000e+01 train_accuracy: 0.321143 val_accuracy: 0.290000
lr 9.000000e-06 reg 2.000000e+01 train_accuracy: 0.328653 val_accuracy: 0.322000
lr 9.000000e-06 reg 3.000000e+01 train_accuracy: 0.288082 val_accuracy: 0.285000
lr 1.000000e-05 reg 0.000000e+00 train_accuracy: 0.366286 val_accuracy: 0.341000
lr 1.000000e-05 reg 1.000000e+01 train_accuracy: 0.384082 val_accuracy: 0.332000
lr 1.000000e-05 reg 2.000000e+01 train_accuracy: 0.289143 val_accuracy: 0.273000
lr 1.000000e-05 reg 3.000000e+01 train_accuracy: 0.301653 val_accuracy: 0.291000

best validation accuracy achieved during cross-validation: 0.409000

``````
``````

In [23]:

# Evaluate the best softmax on test set
y_test_pred = best_softmax.predict(X_test)
test_accuracy = np.mean(y_test == y_test_pred)
print('softmax on raw pixels final test set accuracy: %f' % test_accuracy)

``````
``````

softmax on raw pixels final test set accuracy: 0.338000

``````
``````

In [24]:

# Visualize the learned weights for each class.
w = best_softmax.W[:-1, :] # STRIP OUT THE BIAS
w = w.reshape(32, 32, 3, 10)
w_min, w_max = np.min(w), np.max(w)
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for i in range(10):
plt.subplot(2, 5, i + 1)

#Rescale the weights to be between 0 and 255
wing = 255.0 * (w[:, :, :, i].squeeze() - w_min) / (w_max - w_min)
plt.imshow(wing.astype('uint8'))
plt.axis('off')
plt.title(classes[i])

``````
``````

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-7ee6969e75ce> in <module>()
1 # Visualize the learned weights for each class.
----> 2 w = best_svm.W[:-1, :] # STRIP OUT THE BIAS
3 w = w.reshape(32, 32, 3, 10)
4 w_min, w_max = np.min(w), np.max(w)
5 classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

NameError: name 'best_svm' is not defined

``````
``````

In [ ]:

``````