Setup



In [27]:

    
from com.yahoo.ml.caffe.DisplayUtils import *
from com.yahoo.ml.caffe.CaffeOnSpark import *
from com.yahoo.ml.caffe.Config import *
from com.yahoo.ml.caffe.DataSource import *
import caffe
from caffe import layers as L, params as P
from caffe.proto import caffe_pb2
from caffe import TRAIN, TEST
net_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_train_test.prototxt'
solver_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
training_source = '/Users/mridul/bigml/mnist_train_dataframe'
test_source = '/Users/mridul/bigml/mnist_test_dataframe'



In [28]:

    
cos=CaffeOnSpark(sc)

Define Network



In [29]:

    
def lenet(net_path, training_source, test_source, training_batch_size, test_batch_size):
    n = caffe.NetSpec()
    n.data, n.label = L.MemoryData(batch_size=training_batch_size, channels=1,height=28,width=28, 
                          source = training_source,
                          share_in_parallel = False,
                          source_class="com.yahoo.ml.caffe.ImageDataFrame",
                          transform_param=dict(scale=0.00390625),
                          include=dict(phase=TRAIN),ntop=2)

    train = str(n.to_proto())
    n.data, n.label = L.MemoryData(batch_size=test_batch_size, channels=1,height=28,width=28, 
                          source = test_source,
                          share_in_parallel = False,
                          source_class="com.yahoo.ml.caffe.ImageDataFrame",
                          transform_param=dict(scale=0.00390625),
                          include=dict(phase=TEST),ntop=2)

    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'),
                            param=[dict(lr_mult=1),dict(lr_mult=2)])
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.ip1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'),
                            bias_filler=dict(type='constant'))
    n.relu1 = L.ReLU(n.ip1, in_place=True)
    n.ip2 = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'),
                          bias_filler=dict(type='constant'),param=[dict(lr_mult=1),dict(lr_mult=2)])
    n.accuracy = L.Accuracy(n.ip2, n.label,include=dict(phase=1))
    n.loss =  L.SoftmaxWithLoss(n.ip2, n.label)

    network_layers = str(n.to_proto())
    
    with open(net_path, 'w') as f:
        f.write('name:"LeNet"\n')
        f.write(train)
        f.write(network_layers)
        f.close()

Define Solver



In [30]:

    
def solver(solver_path,net_path,max_iter, learning_rate):
    s = caffe_pb2.SolverParameter()
    
    s.net = net_path
    s.test_interval = 500
    s.test_iter.append(1)
    s.max_iter = max_iter     # # of times to update the net (training iterations)
    # Set the initial learning rate 
    s.base_lr = learning_rate
    # Set `lr_policy` to define how the learning rate changes during training.
    s.lr_policy = 'inv'
    s.gamma = 0.0001
    s.power = 0.75

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss 
    s.display = 100

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 10000
    s.snapshot_prefix = 'caffesnapshot'
    
    # Train on the GPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.GPU

    with open(solver_path, 'w') as f:
        f.write(str(s))
        f.close()

Training with batch size 64 & iteration 100



In [31]:

    
lenet(net_path, training_source,test_source,64,64)
solver(solver_path, net_path,100,0.01)



In [32]:

    
args={}
args['conf']='/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
args['model']='lenet.model'
args['devices']='1'
args['clusterSize']='1'
cfg=Config(sc,args)



In [33]:

    
dl_train_source = DataSource(sc).getSource(cfg,True)



In [34]:

    
cos.train(dl_train_source)

Test 1



In [35]:

    
dl_test_source = DataSource(sc).getSource(cfg,False)



In [36]:

    
test_result1=cos.test(dl_test_source)
test_result1['batch_size']=64
test_result1['learning_rate']=0.01
test_result1['iteration']=100

Retrain with batch size 100 & iteration 200



In [37]:

    
lenet(net_path, training_source,test_source,100,100)
solver(solver_path, net_path,200,0.01)



In [38]:

    
dl_train_source = DataSource(sc).getSource(cfg,True)



In [39]:

    
cos.train(dl_train_source)

Test 2



In [40]:

    
dl_test_source = DataSource(sc).getSource(cfg,False)



In [45]:

    
test_result2=cos.test(dl_test_source)
test_result2['batch_size']=100
test_result2['learning_rate']=0.01
test_result2['iteration']=200

Compare Test1 - Test2



In [46]:

    
test_result = [test_result1,test_result2]



In [47]:

    
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
                                               row['learning_rate'],
                                               row['iteration'],
                                               row['accuracy'][0],
                                               row['loss'][0]),
                                   test_result), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()









    Out[47]:






  
    
      
      Batch Size
      Learning Rate
      Iteration
      Accuracy
      Loss
    
  
  
    
      0
      64
      0.01
      100
      0.920272
      0.260897
    
    
      1
      100
      0.01
      200
      0.960800
      0.135948

Multiple Tests



In [48]:

    
iteration = 200
batch_sizes = [16, 32, 64, 128, 256]
learning_rates = [0.01, 0.001, 0.0001]
test_results=[]
for learning_rate in learning_rates:
    for batch in batch_sizes:
        lenet(net_path, training_source, test_source, batch, batch)
        solver(solver_path, net_path,iteration,learning_rate)
        dl_train_source = DataSource(sc).getSource(cfg,True)
        cos.train(dl_train_source)
        dl_test_source = DataSource(sc).getSource(cfg,False)
        test_result=cos.test(dl_test_source)
        test_result['batch_size']=batch
        test_result['learning_rate']=learning_rate
        test_result['iteration']=iteration
        test_results.append(test_result)

Compare Multiple Tests Results



In [50]:

    
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
                                               row['learning_rate'],
                                               row['iteration'],
                                               row['accuracy'][0],
                                               row['loss'][0]),
                                   test_results), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()









    Out[50]:






  
    
      
      Batch Size
      Learning Rate
      Iteration
      Accuracy
      Loss
    
  
  
    
      0
      16
      0.0100
      200
      0.923438
      0.247389
    
    
      1
      32
      0.0100
      200
      0.936298
      0.207298
    
    
      2
      64
      0.0100
      200
      0.926583
      0.239692
    
    
      3
      128
      0.0100
      200
      0.959936
      0.138777
    
    
      4
      256
      0.0100
      200
      0.958059
      0.145233
    
    
      5
      16
      0.0010
      200
      0.837500
      0.567204
    
    
      6
      32
      0.0010
      200
      0.858173
      0.501626
    
    
      7
      64
      0.0010
      200
      0.868790
      0.462371
    
    
      8
      128
      0.0010
      200
      0.886619
      0.431598
    
    
      9
      256
      0.0010
      200
      0.879831
      0.461506
    
    
      10
      16
      0.0001
      200
      0.422500
      2.096768
    
    
      11
      32
      0.0001
      200
      0.541066
      1.966596
    
    
      12
      64
      0.0001
      200
      0.577123
      1.945433
    
    
      13
      128
      0.0001
      200
      0.362981
      2.149804
    
    
      14
      256
      0.0001
      200
      0.510794
      2.039266



In [ ]:



In [ ]:

	Batch Size	Learning Rate	Iteration	Accuracy	Loss
0	16	0.0100	200	0.923438	0.247389
1	32	0.0100	200	0.936298	0.207298
2	64	0.0100	200	0.926583	0.239692
3	128	0.0100	200	0.959936	0.138777
4	256	0.0100	200	0.958059	0.145233
5	16	0.0010	200	0.837500	0.567204
6	32	0.0010	200	0.858173	0.501626
7	64	0.0010	200	0.868790	0.462371
8	128	0.0010	200	0.886619	0.431598
9	256	0.0010	200	0.879831	0.461506
10	16	0.0001	200	0.422500	2.096768
11	32	0.0001	200	0.541066	1.966596
12	64	0.0001	200	0.577123	1.945433
13	128	0.0001	200	0.362981	2.149804
14	256	0.0001	200	0.510794	2.039266