In [4]:
import numpy as np
from Solver import Solver
from data_utils.data import get_CIFAR10_data
data = get_CIFAR10_data()
for k, v in data.iteritems():
print '%s: ' % k, v.shape
from classifiers.cnn import ThreeLayerConvNet
num_train = 100
small_data = {
'X_train': data['X_train'][:num_train],
'y_train': data['y_train'][:num_train],
'X_val': data['X_val'],
'y_val': data['y_val'],
}
In [2]:
%load_ext autoreload
%autoreload 2
from classifiers import cnn_huge as cnn
#res = cnn.ResNet()
#res.loss(data['X_train'][:num_train],data['y_train'][:num_train])
small_data['X_train'] -= np.mean(small_data['X_train'],axis=0)
In [5]:
from classifiers import cnn_huge as cnn
res = cnn.ResNet(weight_scale=1.8e-03,reg=0.5)
solver = Solver(res, small_data,
update_rule='adam',
optim_config={
'learning_rate': 1e-3,
'stride': 1
},
verbose=True,
num_epochs=5, batch_size=50,
print_every=1)
solver.train()
In [ ]:
import matplotlib.pyplot as plt
plt.subplot(2, 1, 1)
plt.plot(solver.loss_history, 'o')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.subplot(2, 1, 2)
plt.plot(solver.train_acc_history, '-o')
plt.plot(solver.val_acc_history, '-o')
plt.legend(['train', 'val'], loc='upper left')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.show()
In [4]:
from classifiers import cnn_huge as cnn
bn_solvers = {}
solvers = {}
weight_scales = np.logspace(-4, 0, num=20)
for i, weight_scale in enumerate(weight_scales):
print 'Running weight scale %d / %d' % (i + 1, len(weight_scales))
bn_model = cnn.ResNet(reg=0.5, weight_scale=weight_scale, use_batchnorm=True)
#model = FullyConnectedNet(hidden_dims, weight_scale=weight_scale, use_batchnorm=False)
bn_solver = Solver(bn_model, small_data,
num_epochs=10, batch_size=50,
update_rule='adam',
optim_config={
'learning_rate': 1e-3,
},
verbose=False, print_every=20)
bn_solver.train()
bn_solvers[weight_scale] = bn_solver
#solver = Solver(model, small_data,
# num_epochs=10, batch_size=50,
# update_rule='adam',
# optim_config={
# 'learning_rate': 1e-3,
# },
# verbose=False, print_every=200)
#solver.train()
#solvers[weight_scale] = solver
In [5]:
import matplotlib.pyplot as plt
best_train_accs, bn_best_train_accs = [], []
best_val_accs, bn_best_val_accs = [], []
final_train_loss, bn_final_train_loss = [], []
for ws in weight_scales:
#best_train_accs.append(max(solvers[ws].train_acc_history))
bn_best_train_accs.append(max(bn_solvers[ws].train_acc_history))
#best_val_accs.append(max(solvers[ws].val_acc_history))
bn_best_val_accs.append(max(bn_solvers[ws].val_acc_history))
#final_train_loss.append(np.mean(solvers[ws].loss_history[-100:]))
bn_final_train_loss.append(np.mean(bn_solvers[ws].loss_history[-100:]))
plt.subplot(3, 1, 1)
plt.title('Best val accuracy vs weight initialization scale')
plt.xlabel('Weight initialization scale')
plt.ylabel('Best val accuracy')
#plt.semilogx(weight_scales, best_val_accs, '-o', label='baseline')
plt.semilogx(weight_scales, bn_best_val_accs, '-o', label='batchnorm')
plt.legend(ncol=2, loc='lower right')
plt.subplot(3, 1, 2)
plt.title('Best train accuracy vs weight initialization scale')
plt.xlabel('Weight initialization scale')
plt.ylabel('Best training accuracy')
#plt.semilogx(weight_scales, best_train_accs, '-o', label='baseline')
plt.semilogx(weight_scales, bn_best_train_accs, '-o', label='batchnorm')
plt.legend()
plt.subplot(3, 1, 3)
plt.title('Final training loss vs weight initialization scale')
plt.xlabel('Weight initialization scale')
plt.ylabel('Final training loss')
#plt.semilogx(weight_scales, final_train_loss, '-o', label='baseline')
plt.semilogx(weight_scales, bn_final_train_loss, '-o', label='batchnorm')
plt.legend()
plt.gcf().set_size_inches(10, 15)
plt.show()
In [6]:
print bn_final_train_loss
In [7]:
print weight_scales
In [22]:
weight_scales = np.logspace(-1.5, 0, num=1000)
print weight_scales
In [ ]: