In [1]:
import caffe
caffe.set_mode_cpu()
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
%matplotlib inline

In Caffe models get specified in separate protobuf files.

# %load example_caffe_mnist_model.prototxt name: "ExampleCaffeMNISTModel" layer { name: "data" type: "Data" top: "data" top: "label" transform_param { scale: 0.00392156862745 } data_param { source: "/work/rbusche/datasets/mnist/mnist_train_lmdb" batch_size: 64 backend: LMDB } } layer { name: "conv2d_1" type: "Convolution" bottom: "data" top: "conv2d_1" convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" # initialize the filters from a Gaussian std: 0.01 # distribution with stdev 0.01 (default mean: 0) } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv2d_1" top: "conv2d_1" # inplace computation } layer { name: "max_pooling2d_1" type: "Pooling" bottom: "conv2d_1" top: "max_pooling2d_1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2d_2" type: "Convolution" bottom: "max_pooling2d_1" top: "conv2d_2" convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" # initialize the filters from a Gaussian std: 0.01 # distribution with stdev 0.01 (default mean: 0) } bias_filler { type: "constant" value: 0 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2d_2" top: "conv2d_2" # inplace computation } layer { name: "dropout_1" type: "Dropout" bottom: "conv2d_2" top: "dropout_1" } layer { name: "dense_1" type: "InnerProduct" inner_product_param { num_output: 64 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } bottom: "dropout_1" top: "dense_1" } layer { name: "relu3" type: "ReLU" bottom: "dense_1" top: "dense_1" # inplace computation } layer { name: "dropout_2" type: "Dropout" bottom: "dense_1" top: "dropout_2" } layer { name: "dense_2" type: "InnerProduct" inner_product_param { num_output: 10 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } bottom: "dropout_2" top: "dense_2" } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "dense_2" bottom: "label" top: "loss" }

Additionally a solver has to be specified, that determines training parameters.

# %load mnist_solver.prototxt # The train/test net protocol buffer definition net: "example_network.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. test_iter: 100 # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 # The learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 # Display every 100 iterations display: 100 # The maximum number of iterations max_iter: 10000 # solver mode: CPU or GPU solver_mode: CPU

Instantiate the solver and train the network.


In [3]:
solver = caffe.SGDSolver('mnist_solver.prototxt')
solver.net.forward()


Out[3]:
{'loss': array(2.3025808334350586, dtype=float32)}

In [8]:
niter = 2500
test_interval = 100
# losses will also be stored in the log
train_loss = np.zeros(niter)
test_acc = np.zeros(int(np.ceil(niter / test_interval)))
output = np.zeros((niter, 8, 10))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    
    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data
    
    # store the output on the first test batch
    # (start the forward pass at conv1 to avoid loading new data)
    solver.test_nets[0].forward(start='conv2d_1')
    output[it] = solver.test_nets[0].blobs['dense_2'].data[:8]
    
    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        print ('Iteration', it, 'testing...')
        correct = 0
        test_iter = 100
        for test_it in range(test_iter):
            solver.test_nets[0].forward()
            correct += sum(solver.test_nets[0].blobs['dense_2'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / (64 * test_iter)


Iteration 0 testing...
Iteration 100 testing...
Iteration 200 testing...
Iteration 300 testing...
Iteration 400 testing...
Iteration 500 testing...
Iteration 600 testing...
Iteration 700 testing...
Iteration 800 testing...
Iteration 900 testing...
Iteration 1000 testing...
Iteration 1100 testing...
Iteration 1200 testing...
Iteration 1300 testing...
Iteration 1400 testing...
Iteration 1500 testing...
Iteration 1600 testing...
Iteration 1700 testing...
Iteration 1800 testing...
Iteration 1900 testing...
Iteration 2000 testing...
Iteration 2100 testing...
Iteration 2200 testing...
Iteration 2300 testing...
Iteration 2400 testing...

In [9]:
_, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(np.arange(niter), train_loss)
ax2.plot(test_interval * np.arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Test Accuracy: {:.2f}'.format(test_acc[-1]))


Out[9]:
<matplotlib.text.Text at 0x7f3b4a639240>

The weights are saved in a .caffemodel file.


In [11]:
solver.net.save('mnist.caffemodel')