In [27]:
from com.yahoo.ml.caffe.DisplayUtils import *
from com.yahoo.ml.caffe.CaffeOnSpark import *
from com.yahoo.ml.caffe.Config import *
from com.yahoo.ml.caffe.DataSource import *
import caffe
from caffe import layers as L, params as P
from caffe.proto import caffe_pb2
from caffe import TRAIN, TEST
net_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_train_test.prototxt'
solver_path = '/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
training_source = '/Users/mridul/bigml/mnist_train_dataframe'
test_source = '/Users/mridul/bigml/mnist_test_dataframe'
In [28]:
cos=CaffeOnSpark(sc)
In [29]:
def lenet(net_path, training_source, test_source, training_batch_size, test_batch_size):
n = caffe.NetSpec()
n.data, n.label = L.MemoryData(batch_size=training_batch_size, channels=1,height=28,width=28,
source = training_source,
share_in_parallel = False,
source_class="com.yahoo.ml.caffe.ImageDataFrame",
transform_param=dict(scale=0.00390625),
include=dict(phase=TRAIN),ntop=2)
train = str(n.to_proto())
n.data, n.label = L.MemoryData(batch_size=test_batch_size, channels=1,height=28,width=28,
source = test_source,
share_in_parallel = False,
source_class="com.yahoo.ml.caffe.ImageDataFrame",
transform_param=dict(scale=0.00390625),
include=dict(phase=TEST),ntop=2)
n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant'),
param=[dict(lr_mult=1),dict(lr_mult=2)])
n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant'))
n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
n.ip1 = L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant'))
n.relu1 = L.ReLU(n.ip1, in_place=True)
n.ip2 = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'),
bias_filler=dict(type='constant'),param=[dict(lr_mult=1),dict(lr_mult=2)])
n.accuracy = L.Accuracy(n.ip2, n.label,include=dict(phase=1))
n.loss = L.SoftmaxWithLoss(n.ip2, n.label)
network_layers = str(n.to_proto())
with open(net_path, 'w') as f:
f.write('name:"LeNet"\n')
f.write(train)
f.write(network_layers)
f.close()
In [30]:
def solver(solver_path,net_path,max_iter, learning_rate):
s = caffe_pb2.SolverParameter()
s.net = net_path
s.test_interval = 500
s.test_iter.append(1)
s.max_iter = max_iter # # of times to update the net (training iterations)
# Set the initial learning rate
s.base_lr = learning_rate
# Set `lr_policy` to define how the learning rate changes during training.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75
# Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
# weighted average of the current gradient and previous gradients to make
# learning more stable. L2 weight decay regularizes learning, to help prevent
# the model from overfitting.
s.momentum = 0.9
s.weight_decay = 5e-4
# Display the current training loss
s.display = 100
# Snapshots are files used to store networks we've trained. Here, we'll
# snapshot every 10K iterations -- ten times during training.
s.snapshot = 10000
s.snapshot_prefix = 'caffesnapshot'
# Train on the GPU. Using the CPU to train large networks is very slow.
s.solver_mode = caffe_pb2.SolverParameter.GPU
with open(solver_path, 'w') as f:
f.write(str(s))
f.close()
In [31]:
lenet(net_path, training_source,test_source,64,64)
solver(solver_path, net_path,100,0.01)
In [32]:
args={}
args['conf']='/Users/mridul/bigml/CaffeOnSpark/data/lenet_dataframe_solver.prototxt'
args['model']='lenet.model'
args['devices']='1'
args['clusterSize']='1'
cfg=Config(sc,args)
In [33]:
dl_train_source = DataSource(sc).getSource(cfg,True)
In [34]:
cos.train(dl_train_source)
In [35]:
dl_test_source = DataSource(sc).getSource(cfg,False)
In [36]:
test_result1=cos.test(dl_test_source)
test_result1['batch_size']=64
test_result1['learning_rate']=0.01
test_result1['iteration']=100
In [37]:
lenet(net_path, training_source,test_source,100,100)
solver(solver_path, net_path,200,0.01)
In [38]:
dl_train_source = DataSource(sc).getSource(cfg,True)
In [39]:
cos.train(dl_train_source)
In [40]:
dl_test_source = DataSource(sc).getSource(cfg,False)
In [45]:
test_result2=cos.test(dl_test_source)
test_result2['batch_size']=100
test_result2['learning_rate']=0.01
test_result2['iteration']=200
In [46]:
test_result = [test_result1,test_result2]
In [47]:
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
row['learning_rate'],
row['iteration'],
row['accuracy'][0],
row['loss'][0]),
test_result), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()
Out[47]:
In [48]:
iteration = 200
batch_sizes = [16, 32, 64, 128, 256]
learning_rates = [0.01, 0.001, 0.0001]
test_results=[]
for learning_rate in learning_rates:
for batch in batch_sizes:
lenet(net_path, training_source, test_source, batch, batch)
solver(solver_path, net_path,iteration,learning_rate)
dl_train_source = DataSource(sc).getSource(cfg,True)
cos.train(dl_train_source)
dl_test_source = DataSource(sc).getSource(cfg,False)
test_result=cos.test(dl_test_source)
test_result['batch_size']=batch
test_result['learning_rate']=learning_rate
test_result['iteration']=iteration
test_results.append(test_result)
In [50]:
t = sqlContext.createDataFrame(map(lambda row:(row['batch_size'],
row['learning_rate'],
row['iteration'],
row['accuracy'][0],
row['loss'][0]),
test_results), ["Batch Size", "Learning Rate", "Iteration","Accuracy", "Loss"])
t.toPandas()
Out[50]:
In [ ]:
In [ ]: