In [1]:
from pylab import *
%matplotlib inline
import caffe
import os
os.chdir('/home/mckc/image class/')
In [2]:
from caffe import layers as L, params as P
def lenet(data_location, batch_size):
# our version of LeNet: a series of linear and simple nonlinear transformations
n = caffe.NetSpec()
n.data, n.label = L.ImageData(batch_size=batch_size, source=data_location,
transform_param=(dict(scale=1./255,mirror=True,crop_size= 224)), ntop=2)
n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.fc1 = L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
n.relu1 = L.ReLU(n.fc1, in_place=True)
n.score = L.InnerProduct(n.relu1, num_output=2, weight_filler=dict(type='xavier'))
n.loss = L.SoftmaxWithLoss(n.score, n.label)
return n.to_proto()
with open('lenet_auto_train.prototxt', 'w') as f:
f.write(str(lenet('caffe_train.txt', 2)))
with open('lenet_auto_test.prototxt', 'w') as f:
f.write(str(lenet('caffe_validate.txt', 2)))
In [3]:
!cat lenet_auto_train.prototxt
In [4]:
from caffe.proto import caffe_pb2
def solver(train_net_path, test_net_path):
s = caffe_pb2.SolverParameter()
# Specify locations of the train and test networks.
s.train_net = train_net_path
s.test_net.append(test_net_path)
s.test_interval = 10 # Test after every 1000 training iterations.
s.test_iter.append(250) # Test 250 "batches" each time we test.
s.max_iter = 10000 # # of times to update the net (training iterations)
# Set the initial learning rate for stochastic gradient descent (SGD).
s.base_lr = 0.0001
# Set `lr_policy` to define how the learning rate changes during training.
# Here, we 'step' the learning rate by multiplying it by a factor `gamma`
# every `stepsize` iterations.
s.lr_policy = 'step'
s.gamma = 0.1
#s.stepsize = 5000
# Set other optimization parameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help prevent
# the model from overfitting.
s.momentum = 0.9
s.weight_decay = 5e-4
# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000
# Snapshots are files used to store networks we've trained. Here, we'll
# snapshot every 10K iterations -- just once at the end of training.
# For larger networks that take longer to train, you may want to set
# snapshot < max_iter to save the network and training state to disk during
# optimization, preventing disaster in case of machine crashes, etc.
s.snapshot = 100
s.snapshot_prefix= "lenet"
#s.snapshot_prefix = 'examples/hdf5_classification/data/train'
# We'll train on the CPU for fair benchmarking against scikit-learn.
# Changing to GPU should result in much faster training!
#s.solver_mode = caffe_pb2.SolverParameter.CPU
return s
solver_path = 'logreg_solver.prototxt'
with open(solver_path, 'w') as f:
f.write(str(solver('lenet_auto_train.prototxt', 'lenet_auto_test.prototxt')))
In [5]:
!cat lenet_auto_solver.prototxt
In [6]:
!cat logreg_solver.prototxt
In [7]:
caffe.set_device(0)
caffe.set_mode_gpu()
### load the solver and create train and test nets
solver = None # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
#solver = caffe.SGDSolver('lenet_auto_solver.prototxt')
solver = caffe.SGDSolver('logreg_solver.prototxt')
In [8]:
# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]
Out[8]:
In [9]:
# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]
Out[9]:
In [ ]:
solver.net.forward() # train net
solver.test_nets[0].forward() # test net (there can be more than one)
In [ ]:
solver.step(1)
In [17]:
for i in range(10):
solver.step(1)
print solver.net.forward()
In [ ]:
imshow(solver.net.params['conv1'][0].diff[:, 0].reshape(4, 5, 5, 5)
.transpose(0, 2, 1, 3).reshape(4*5, 5*5), cmap='gray'); axis('off')