In reference to Szilard's Benchmark-DL
In [1]:
import sys, os
import os.path
import h2o
from h2o.estimators.deepwater import H2ODeepWaterEstimator
PATH = os.path.expanduser("~/h2o-3/")
h2o.init(nthreads=-1)
if not H2ODeepWaterEstimator.available(): exit
In [2]:
!nvidia-smi
In [3]:
train = h2o.import_file(PATH + "bigdata/laptop/mnist/train.csv.gz")
predictors = list(range(0,784))
resp = 784
train[predictors] = train[predictors]/255.0
train[resp] = train[resp].asfactor()
nclasses = train[resp].nlevels()[0]
In [4]:
def cnn(num_classes):
import mxnet as mx
data = mx.symbol.Variable('data')
conv1 = mx.symbol.Convolution(data=data, kernel=(4,4), num_filter=32)
relu1 = mx.symbol.Activation(data=conv1, act_type="relu")
pool1 = mx.symbol.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))
conv2 = mx.symbol.Convolution(data=pool1, kernel=(3,3), num_filter=16)
relu2 = mx.symbol.Activation(data=conv2, act_type="relu")
pool2 = mx.symbol.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
drop = mx.symbol.Dropout(data=pool2, p=0.2)
flatten = mx.symbol.Flatten(data=drop)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=128)
relu3 = mx.symbol.Activation(data=fc1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data=relu3, num_hidden=64)
relu4 = mx.symbol.Activation(data=fc2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data=relu4, num_hidden=num_classes)
net = mx.symbol.SoftmaxOutput(data=fc3, name='softmax')
return net
cnn(nclasses).save("/tmp/cnn.json")
In [5]:
model = H2ODeepWaterEstimator(
epochs=10,
learning_rate=0.05,
learning_rate_annealing=1e-5,
momentum_start=0.9,
momentum_stable=0.9,
mini_batch_size=128,
network_definition_file="/tmp/cnn.json",
image_shape=[28,28], ## MNIST is 28x28 pixels
channels=1, ## monochrome
device_id=[0], ## select which GPU to use
gpu=True, ## disable to compare to CPU
standardize=False, ## data was manually scaled to 0..1
ignore_const_cols=False, ## include all pixels even though some are always 0
seed=1234 ## for reproducibility
#,score_duty_cycle=0 ## uncomment for faster run (no scoring until the end)
)
model.train(x=predictors,y=resp, training_frame=train)
In [6]:
model.plot()
In [7]:
model.show()
In [8]:
model.scoring_history()
Out[8]:
In [9]:
test = h2o.import_file(PATH + "bigdata/laptop/mnist/test.csv.gz")
test[predictors] = test[predictors]/255.0
test[resp] = test[resp].asfactor()
print(model.model_performance(test))
In [10]:
h2o.download_all_logs("/tmp/")
Out[10]: