In [1]:
from pylab import *
%matplotlib inline
import caffe
import os
os.chdir('/home/mckc/image class/')

In [2]:
from caffe import layers as L, params as P

def lenet(data_location, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec()
    
    n.data, n.label = L.ImageData(batch_size=batch_size,  source=data_location,
                             transform_param=(dict(scale=1./255,mirror=True,crop_size= 224)), ntop=2)
    
    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=2, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)
    
    return n.to_proto()
    
with open('lenet_auto_train.prototxt', 'w') as f:
    f.write(str(lenet('caffe_train.txt', 2)))
    
with open('lenet_auto_test.prototxt', 'w') as f:
    f.write(str(lenet('caffe_validate.txt', 2)))

In [3]:
!cat lenet_auto_train.prototxt


layer {
  name: "data"
  type: "ImageData"
  top: "data"
  top: "label"
  transform_param {
    scale: 0.00392156862745
    mirror: true
    crop_size: 224
  }
  image_data_param {
    source: "caffe_train.txt"
    batch_size: 2
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  convolution_param {
    num_output: 20
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  convolution_param {
    num_output: 50
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "fc1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "fc1"
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "fc1"
  top: "fc1"
}
layer {
  name: "score"
  type: "InnerProduct"
  bottom: "fc1"
  top: "score"
  inner_product_param {
    num_output: 2
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "score"
  bottom: "label"
  top: "loss"
}

In [4]:
from caffe.proto import caffe_pb2

def solver(train_net_path, test_net_path):
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and test networks.
    s.train_net = train_net_path
    s.test_net.append(test_net_path)

    s.test_interval = 10  # Test after every 1000 training iterations.
    s.test_iter.append(250) # Test 250 "batches" each time we test.

    s.max_iter = 10000      # # of times to update the net (training iterations)

    # Set the initial learning rate for stochastic gradient descent (SGD).
    s.base_lr = 0.0001        

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    #s.stepsize = 5000

    # Set other optimization parameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- just once at the end of training.
    # For larger networks that take longer to train, you may want to set
    # snapshot < max_iter to save the network and training state to disk during
    # optimization, preventing disaster in case of machine crashes, etc.
    s.snapshot = 100
    s.snapshot_prefix= "lenet"
    #s.snapshot_prefix = 'examples/hdf5_classification/data/train'

    # We'll train on the CPU for fair benchmarking against scikit-learn.
    # Changing to GPU should result in much faster training!
    #s.solver_mode = caffe_pb2.SolverParameter.CPU
    
    return s

solver_path = 'logreg_solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(solver('lenet_auto_train.prototxt', 'lenet_auto_test.prototxt')))

In [5]:
!cat lenet_auto_solver.prototxt


# The train/test net protocol buffer definition
train_net: "lenet_auto_train.prototxt"
test_net: "lenet_auto_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 50
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 1000
# snapshot intermediate results
snapshot: 50
snapshot_prefix: "lenet"

In [6]:
!cat logreg_solver.prototxt


train_net: "lenet_auto_train.prototxt"
test_net: "lenet_auto_test.prototxt"
test_iter: 250
test_interval: 10
base_lr: 0.0001
display: 1000
max_iter: 10000
lr_policy: "step"
gamma: 0.1
momentum: 0.9
weight_decay: 0.0005
snapshot: 100
snapshot_prefix: "lenet"

In [7]:
caffe.set_device(0)
caffe.set_mode_gpu()

### load the solver and create train and test nets
solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
#solver = caffe.SGDSolver('lenet_auto_solver.prototxt')
solver  = caffe.SGDSolver('logreg_solver.prototxt')

In [8]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]


Out[8]:
[('data', (2, 3, 224, 224)),
 ('label', (2,)),
 ('conv1', (2, 20, 220, 220)),
 ('pool1', (2, 20, 110, 110)),
 ('conv2', (2, 50, 106, 106)),
 ('pool2', (2, 50, 53, 53)),
 ('fc1', (2, 500)),
 ('score', (2, 2)),
 ('loss', ())]

In [9]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]


Out[9]:
[('conv1', (20, 3, 5, 5)),
 ('conv2', (50, 20, 5, 5)),
 ('fc1', (500, 140450)),
 ('score', (2, 500))]

In [ ]:
solver.net.forward()  # train net
solver.test_nets[0].forward()  # test net (there can be more than one)

In [ ]:
solver.step(1)

In [17]:
for i in range(10):
    solver.step(1)
    print solver.net.forward()


{'loss': array(0.0, dtype=float32)}
{'loss': array(43.66827392578125, dtype=float32)}
{'loss': array(43.66827392578125, dtype=float32)}
{'loss': array(0.0, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}
{'loss': array(87.3365478515625, dtype=float32)}

In [ ]:
imshow(solver.net.params['conv1'][0].diff[:, 0].reshape(4, 5, 5, 5)
       .transpose(0, 2, 1, 3).reshape(4*5, 5*5), cmap='gray'); axis('off')