In [1]:
import caffe
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'gray'
%matplotlib inline

In Caffe models get specified in separate protobuf files.

# %load example_caffe_mnist_model.prototxt name: "ExampleCaffeMNISTModel" layer { name: "data" type: "Data" top: "data" top: "label" transform_param { scale: 0.00392156862745 } data_param { source: "/work/rbusche/datasets/mnist/mnist_train_lmdb" batch_size: 64 backend: LMDB } } layer { name: "conv2d_1" type: "Convolution" bottom: "data" top: "conv2d_1" convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" # initialize the filters from a Gaussian std: 0.01 # distribution with stdev 0.01 (default mean: 0) } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv2d_1" top: "conv2d_1" # inplace computation } layer { name: "max_pooling2d_1" type: "Pooling" bottom: "conv2d_1" top: "max_pooling2d_1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "conv2d_2" type: "Convolution" bottom: "max_pooling2d_1" top: "conv2d_2" convolution_param { num_output: 32 kernel_size: 3 stride: 1 weight_filler { type: "gaussian" # initialize the filters from a Gaussian std: 0.01 # distribution with stdev 0.01 (default mean: 0) } bias_filler { type: "constant" value: 0 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2d_2" top: "conv2d_2" # inplace computation } layer { name: "dropout_1" type: "Dropout" bottom: "conv2d_2" top: "dropout_1" } layer { name: "dense_1" type: "InnerProduct" inner_product_param { num_output: 64 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } bottom: "dropout_1" top: "dense_1" } layer { name: "relu3" type: "ReLU" bottom: "dense_1" top: "dense_1" # inplace computation } layer { name: "dropout_2" type: "Dropout" bottom: "dense_1" top: "dropout_2" } layer { name: "dense_2" type: "InnerProduct" inner_product_param { num_output: 10 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } bottom: "dropout_2" top: "dense_2" } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "dense_2" bottom: "label" top: "loss" }

Additionally a solver has to be specified, that determines training parameters.

# %load mnist_solver.prototxt # The train/test net protocol buffer definition net: "example_network.prototxt" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. test_iter: 100 # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 # The learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 # Display every 100 iterations display: 100 # The maximum number of iterations max_iter: 10000 # solver mode: CPU or GPU solver_mode: CPU

Instantiate the solver and train the network.

In [3]:
solver = caffe.SGDSolver('mnist_solver.prototxt')

{'loss': array(2.3025808334350586, dtype=float32)}

In [8]:
niter = 2500
test_interval = 100
# losses will also be stored in the log
train_loss = np.zeros(niter)
test_acc = np.zeros(int(np.ceil(niter / test_interval)))
output = np.zeros((niter, 8, 10))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe
    # store the train loss
    train_loss[it] =['loss'].data
    # store the output on the first test batch
    # (start the forward pass at conv1 to avoid loading new data)
    output[it] = solver.test_nets[0].blobs['dense_2'].data[:8]
    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        print ('Iteration', it, 'testing...')
        correct = 0
        test_iter = 100
        for test_it in range(test_iter):
            correct += sum(solver.test_nets[0].blobs['dense_2'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / (64 * test_iter)

In [9]:
_, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(np.arange(niter), train_loss)
ax2.plot(test_interval * np.arange(len(test_acc)), test_acc, 'r')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Test Accuracy: {:.2f}'.format(test_acc[-1]))

<matplotlib.text.Text at 0x7f3b4a639240>

The weights are saved in a .caffemodel file.

In [11]:'mnist.caffemodel')