Setup


In [27]:
from com.yahoo.ml.caffe.DisplayUtils import *
from com.yahoo.ml.caffe.CaffeOnSpark import *
from com.yahoo.ml.caffe.Config import *
from com.yahoo.ml.caffe.DataSource import *
import caffe
from caffe import layers as L, params as P
from caffe.proto import caffe_pb2
from caffe import TRAIN, TEST
net_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_train_test.prototxt'
solver_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
training_source = '/Users/mridul/bigml/mnist_train_dataframe'
test_source = '/Users/mridul/bigml/mnist_test_dataframe'

In [28]:
cos=CaffeOnSpark(sc)

Define Network


In [29]:
def lenet(net_path, training_source, test_source, training_batch_size, test_batch_size):
    n = caffe.NetSpec()
    n.data, n.label = L.MemoryData(batch_size=training_batch_size, channels=1,height=28,width=28, 
                          source = training_source,
                          share_in_parallel = False,
                          source_class="com.yahoo.ml.caffe.ImageDataFrame",
                          transform_param=dict(scale=0.00390625),
                          include=dict(phase=TRAIN),ntop=2)

    train = str(n.to_proto())
    n.data, n.label = L.MemoryData(batch_size=test_batch_size, channels=1,height=28,width=28, 
                          source = test_source,
                          share_in_parallel = False,
                          source_class="com.yahoo.ml.caffe.ImageDataFrame",
                          transform_param=dict(scale=0.00390625),
                          include=dict(phase=TEST),ntop=2)

    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'),
                            param=[dict(lr_mult=1),dict(lr_mult=2)])
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.ip1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    n.relu1 = L.ReLU(n.ip1, in_place=True)
    n.ip2 = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'),
                          bias_filler=dict(type='constant'),param=[dict(lr_mult=1),dict(lr_mult=2)])
    n.accuracy = L.Accuracy(n.ip2, n.label,include=dict(phase=1))
    n.loss =  L.SoftmaxWithLoss(n.ip2, n.label)

    network_layers = str(n.to_proto())
    
    with open(net_path, 'w') as f:
        f.write('name:"LeNet"\n')
        f.write(train)
        f.write(network_layers)
        f.close()

Define Solver


In [30]:
def solver(solver_path,net_path,max_iter, learning_rate):
    s = caffe_pb2.SolverParameter()
    
    s.net = net_path
    s.test_interval = 500
    s.test_iter.append(1)
    s.max_iter = max_iter     # # of times to update the net (training iterations)
    # Set the initial learning rate 
    s.base_lr = learning_rate
    # Set `lr_policy` to define how the learning rate changes during training.
    s.lr_policy = 'inv'
    s.gamma = 0.0001
    s.power = 0.75

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss 
    s.display = 100

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 10000
    s.snapshot_prefix = 'caffesnapshot'
    
    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    with open(solver_path, 'w') as f:
        f.write(str(s))
        f.close()

Training with batch size 64 & iteration 100


In [31]:
lenet(net_path, training_source,test_source,64,64)
solver(solver_path, net_path,100,0.01)

In [32]:
args={}
args['conf']='/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
args['model']='lenet.model'
args['devices']='1'
args['clusterSize']='1'
cfg=Config(sc,args)

In [33]:
dl_train_source = DataSource(sc).getSource(cfg,True)

In [34]:
cos.train(dl_train_source)

Test 1


In [35]:
dl_test_source = DataSource(sc).getSource(cfg,False)

In [36]:
test_result1=cos.test(dl_test_source)
test_result1['batch_size']=64
test_result1['learning_rate']=0.01
test_result1['iteration']=100

Retrain with batch size 100 & iteration 200


In [37]:
lenet(net_path, training_source,test_source,100,100)
solver(solver_path, net_path,200,0.01)

In [38]:
dl_train_source = DataSource(sc).getSource(cfg,True)

In [39]:
cos.train(dl_train_source)

Test 2


In [40]:
dl_test_source = DataSource(sc).getSource(cfg,False)

In [45]:
test_result2=cos.test(dl_test_source)
test_result2['batch_size']=100
test_result2['learning_rate']=0.01
test_result2['iteration']=200

Compare Test1 - Test2


In [46]:
test_result = [test_result1,test_result2]

In [47]:
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
                                               row['learning_rate'],
                                               row['iteration'],
                                               row['accuracy'][0],
                                               row['loss'][0]),
                                   test_result), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()


Out[47]:
Batch Size Learning Rate Iteration Accuracy Loss
0 64 0.01 100 0.920272 0.260897
1 100 0.01 200 0.960800 0.135948

Multiple Tests


In [48]:
iteration = 200
batch_sizes = [16, 32, 64, 128, 256]
learning_rates = [0.01, 0.001, 0.0001]
test_results=[]
for learning_rate in learning_rates:
    for batch in batch_sizes:
        lenet(net_path, training_source, test_source, batch, batch)
        solver(solver_path, net_path,iteration,learning_rate)
        dl_train_source = DataSource(sc).getSource(cfg,True)
        cos.train(dl_train_source)
        dl_test_source = DataSource(sc).getSource(cfg,False)
        test_result=cos.test(dl_test_source)
        test_result['batch_size']=batch
        test_result['learning_rate']=learning_rate
        test_result['iteration']=iteration
        test_results.append(test_result)

Compare Multiple Tests Results


In [50]:
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
                                               row['learning_rate'],
                                               row['iteration'],
                                               row['accuracy'][0],
                                               row['loss'][0]),
                                   test_results), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()


Out[50]:
Batch Size Learning Rate Iteration Accuracy Loss
0 16 0.0100 200 0.923438 0.247389
1 32 0.0100 200 0.936298 0.207298
2 64 0.0100 200 0.926583 0.239692
3 128 0.0100 200 0.959936 0.138777
4 256 0.0100 200 0.958059 0.145233
5 16 0.0010 200 0.837500 0.567204
6 32 0.0010 200 0.858173 0.501626
7 64 0.0010 200 0.868790 0.462371
8 128 0.0010 200 0.886619 0.431598
9 256 0.0010 200 0.879831 0.461506
10 16 0.0001 200 0.422500 2.096768
11 32 0.0001 200 0.541066 1.966596
12 64 0.0001 200 0.577123 1.945433
13 128 0.0001 200 0.362981 2.149804
14 256 0.0001 200 0.510794 2.039266

In [ ]:


In [ ]: