In [1]:
import numpy as np
# import impl.neuralnet as nn
import impl.CNN as cnn
import impl.CNN2 as cnn2
from impl.solver import *
import sys
In [2]:
n_iter = 1000 # number of epochs
alpha = 1e-3
mb_size = 64 # minibatch size usually compatible to the Cache/RAM size
n_experiment = 1
reg = 1e-5
print_after = 100
p_dropout = 0.8 # dropout/keep_prob
loss = 'cross_ent'
nonlin = 'relu'
solver = 'sgd'
In [3]:
# import hipsternet.input_data as input_data # NOT used for MNIST
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)
X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
y_test.shape, y_val.shape, y_train.shape
Out[3]:
In [4]:
M, D, C = X_train.shape[0], X_train.shape[1], y_train.max() + 1
M, D, C
Out[4]:
In [6]:
import impl.utils as utils
X_train, X_val, X_test = utils.prepro(X_train, X_val, X_test)
X_train.shape, X_val.shape, X_test.shape
Out[6]:
In [7]:
# if net_type == 'cnn':
img_shape = (1, 28, 28)
X_train = X_train.reshape(-1, *img_shape)
X_val = X_val.reshape(-1, *img_shape)
X_test = X_test.reshape(-1, *img_shape)
X_train.shape, X_val.shape, X_test.shape
Out[7]:
In [8]:
solvers = dict(
sgd=sgd,
momentum=momentum,
nesterov=nesterov,
adagrad=adagrad,
rmsprop=rmsprop,
adam=adam
)
solvers
Out[8]:
In [9]:
solver_fun = solvers[solver] # solver functions
accs = np.zeros(n_experiment)
solver_fun, accs
# print()
print('Experimenting on {}'.format(solver))
# print()
In [10]:
for k in range(n_experiment):
# print('PReLU as the activation function')
# print('ReLU as the activation function')
# print('Softplus as the activation function')
# print('Sigmoid as the activation function')
# print('ELU as the activation function')
print('Neural_Biased_Fully_Parametric_leaky_ReLU as the activation function')
# print('Biased_Fully_Parametric_ReLU as the activation function')
# print('Fully_Parametric_ReLU as the activation function')
# print('P_Leaky ReLU as the activation function')
# print('Leaky ReLU as the activation function')
# print('Noisy ReLU as the activation function')
# print('Tanh as the activation function')
# print('Integral Tanh as the activation function')
print('Experiment-{}'.format(k + 1))
# net = nn.FeedForwardNet(D, C, H=128, lam=reg, p_dropout=p_dropout, loss=loss, nonlin=nonlin)
# net = nn.ConvNet(D=10, C=C, H=128) # Original one
net = cnn2.CNN2(C=C, D=10, H=128) # Mine
net = solver_fun(nn=net, X_train=X_train, y_train=y_train, val_set=(X_val, y_val),
mb_size=mb_size, alpha=alpha, n_iter=n_iter, print_after=print_after)
y_pred = net.predict(X_test)
accs[k] = np.mean(y_pred == y_test)
print()
print('Test Mean accuracy: {:.4f}, std: {:.4f}'.format(accs.mean(), accs.std()))
In [ ]:
Neural_Biased_Fully_Parametric_leaky_ReLU as the activation function
Experiment-1
Iter-100 loss: 2.2692 validation: 0.526000
Iter-200 loss: 1.7341 validation: 0.687800
Iter-300 loss: 1.5949 validation: 0.746200
Iter-400 loss: 1.4960 validation: 0.781800
Iter-500 loss: 1.2944 validation: 0.813600
Iter-600 loss: 1.1659 validation: 0.828200
Iter-700 loss: 1.1381 validation: 0.840600
Iter-800 loss: 1.2028 validation: 0.850200
Iter-900 loss: 0.9246 validation: 0.855000
Iter-1000 loss: 1.1049 validation: 0.857200
Test Mean accuracy: 0.8583, std: 0.0000
In [ ]:
Neural_Biased_Fully_Parametric_ReLU as the activation function
Experiment-1
Iter-100 loss: 2.3317 validation: 0.443800
Iter-200 loss: 2.0487 validation: 0.608000
Iter-300 loss: 1.7188 validation: 0.695800
Iter-400 loss: 1.6585 validation: 0.750600
Iter-500 loss: 1.5276 validation: 0.780600
Iter-600 loss: 1.2190 validation: 0.801200
Iter-700 loss: 1.1202 validation: 0.815000
Iter-800 loss: 1.1930 validation: 0.826600
Iter-900 loss: 1.0390 validation: 0.832200
Iter-1000 loss: 1.0624 validation: 0.842000
Test Mean accuracy: 0.8456, std: 0.0000
In [ ]:
Fully_Parametric_ReLU as the activation function
Experiment-1
Iter-100 loss: 2.1334 validation: 0.492800
Iter-200 loss: 1.6773 validation: 0.685200
Iter-300 loss: 1.3936 validation: 0.757200
Iter-400 loss: 1.5766 validation: 0.793800
Iter-500 loss: 1.2212 validation: 0.819200
Iter-600 loss: 1.0807 validation: 0.835200
Iter-700 loss: 0.9756 validation: 0.846600
Iter-800 loss: 1.1141 validation: 0.858400
Iter-900 loss: 1.0170 validation: 0.861800
Iter-1000 loss: 1.0931 validation: 0.870000
Test Mean accuracy: 0.8608, std: 0.0000
In [ ]:
Fully_Parametric_ReLU as the activation function
Experiment-1
Iter-100 loss: 2.2642 validation: 0.397200
Iter-200 loss: 1.6114 validation: 0.611200
Iter-300 loss: 1.5337 validation: 0.699400
Iter-400 loss: 1.3347 validation: 0.746600
Iter-500 loss: 1.3523 validation: 0.776400
Iter-600 loss: 1.3097 validation: 0.799000
Iter-700 loss: 1.1585 validation: 0.818200
Iter-800 loss: 1.2777 validation: 0.830800
Iter-900 loss: 1.0084 validation: 0.841000
Iter-1000 loss: 0.8520 validation: 0.848200
Test Mean accuracy: 0.8466, std: 0.0000
In [ ]:
Fully_Parametric_ReLU as the activation function
Experiment-1
Iter-100 loss: 1.8847 validation: 0.549600
Iter-200 loss: 1.5904 validation: 0.711000
Iter-300 loss: 1.1620 validation: 0.773400
Iter-400 loss: 1.1835 validation: 0.805600
Iter-500 loss: 1.0188 validation: 0.822400
Iter-600 loss: 1.0835 validation: 0.834600
Iter-700 loss: 1.0358 validation: 0.843000
Iter-800 loss: 1.0515 validation: 0.855400
Iter-900 loss: 0.9910 validation: 0.858600
Iter-1000 loss: 1.0304 validation: 0.866000
Test Mean accuracy: 0.8649, std: 0.0000
In [ ]:
P_Leaky ReLU as the activation function
Experiment-1
Iter-100 loss: 1.4535 validation: 0.684000
Iter-200 loss: 1.2556 validation: 0.779000
Iter-300 loss: 1.1196 validation: 0.831000
Iter-400 loss: 0.9088 validation: 0.852600
Iter-500 loss: 1.2693 validation: 0.866000
Iter-600 loss: 0.8380 validation: 0.874000
Iter-700 loss: 0.8750 validation: 0.877800
Iter-800 loss: 0.9953 validation: 0.879200
Iter-900 loss: 0.8597 validation: 0.885600
Iter-1000 loss: 0.9202 validation: 0.891200
Test Mean accuracy: 0.8841, std: 0.0000
In [ ]:
Softplus as the activation function
Experiment-1
Iter-100 loss: 2.6680 validation: 0.256400
Iter-200 loss: 2.4013 validation: 0.495800
Iter-300 loss: 2.2755 validation: 0.617600
Iter-400 loss: 1.8953 validation: 0.668000
Iter-500 loss: 1.6816 validation: 0.701200
Iter-600 loss: 1.5913 validation: 0.755000
Iter-700 loss: 1.6193 validation: 0.769600
Iter-800 loss: 1.4137 validation: 0.790000
Iter-900 loss: 1.4461 validation: 0.794800
Iter-1000 loss: 1.2362 validation: 0.823200
Test Mean accuracy: 0.8216, std: 0.0000
In [ ]:
Softplus as the activation function
Experiment-1
Iter-100 loss: 2.4244 validation: 0.429600
Iter-200 loss: 2.0749 validation: 0.597400
Iter-300 loss: 1.8657 validation: 0.671000
Iter-400 loss: 1.5912 validation: 0.719600
Iter-500 loss: 1.4152 validation: 0.766600
Iter-600 loss: 1.3833 validation: 0.791800
Iter-700 loss: 1.3204 validation: 0.802200
Iter-800 loss: 1.1555 validation: 0.823600
Iter-900 loss: 1.2917 validation: 0.833200
Iter-1000 loss: 1.1005 validation: 0.845200
Test Mean accuracy: 0.8476, std: 0.0000
In [ ]:
Tanh as the activation function
Experiment-1
Iter-100 loss: 2.0991 validation: 0.489400
Iter-200 loss: 1.8180 validation: 0.673800
Iter-300 loss: 1.5370 validation: 0.745400
Iter-400 loss: 1.6717 validation: 0.787600
Iter-500 loss: 1.1704 validation: 0.810000
Iter-600 loss: 1.2065 validation: 0.824600
Iter-700 loss: 1.2123 validation: 0.834800
Iter-800 loss: 1.1342 validation: 0.842800
Iter-900 loss: 1.3894 validation: 0.850200
Iter-1000 loss: 1.3566 validation: 0.856800
Test Mean accuracy: 0.8590, std: 0.0000
In [ ]:
Integral Tanh as the activation function
Experiment-1
Iter-100 loss: 2.6426 validation: 0.243200
Iter-200 loss: 2.5607 validation: 0.425000
Iter-300 loss: 2.3071 validation: 0.573000
Iter-400 loss: 2.1080 validation: 0.660400
Iter-500 loss: 1.8439 validation: 0.720400
Iter-600 loss: 1.6523 validation: 0.762600
Iter-700 loss: 1.5280 validation: 0.787600
Iter-800 loss: 1.3998 validation: 0.807400
Iter-900 loss: 1.3726 validation: 0.822400
Iter-1000 loss: 1.2396 validation: 0.833400
Test Mean accuracy: 0.8305, std: 0.0000
In [ ]:
Integral Tanh as the activation function
Experiment-1
Iter-100 loss: 2.8024 validation: 0.166800
Iter-200 loss: 2.6616 validation: 0.262600
Iter-300 loss: 2.6021 validation: 0.358000
Iter-400 loss: 2.5179 validation: 0.462800
Iter-500 loss: 2.3503 validation: 0.543600
Iter-600 loss: 2.2191 validation: 0.595200
Iter-700 loss: 2.1414 validation: 0.659200
Iter-800 loss: 1.8706 validation: 0.702000
Iter-900 loss: 1.6478 validation: 0.746400
Iter-1000 loss: 1.5556 validation: 0.784200
Test Mean accuracy: 0.7918, std: 0.0000
In [ ]:
ELU as the activation function
Experiment-1
Iter-100 loss: 2.0956 validation: 0.545400
Iter-200 loss: 1.4449 validation: 0.700400
Iter-300 loss: 1.2507 validation: 0.763600
Iter-400 loss: 1.2401 validation: 0.803800
Iter-500 loss: 1.3063 validation: 0.819000
Iter-600 loss: 1.2512 validation: 0.837400
Iter-700 loss: 0.9628 validation: 0.849200
Iter-800 loss: 1.1751 validation: 0.855800
Iter-900 loss: 0.8988 validation: 0.862200
Iter-1000 loss: 1.0802 validation: 0.867000
Test Mean accuracy: 0.8686, std: 0.0000
In [ ]:
ELU as the activation function
Experiment-1
Iter-100 loss: 2.1013 validation: 0.531200
Iter-200 loss: 1.6332 validation: 0.698200
Iter-300 loss: 1.5368 validation: 0.756600
Iter-400 loss: 1.3380 validation: 0.796800
Iter-500 loss: 1.2161 validation: 0.818200
Iter-600 loss: 1.0583 validation: 0.832600
Iter-700 loss: 1.0719 validation: 0.844400
Iter-800 loss: 1.1432 validation: 0.854400
Iter-900 loss: 0.9358 validation: 0.862200
Iter-1000 loss: 0.9857 validation: 0.864600
Test Mean accuracy: 0.8681, std: 0.0000
In [ ]:
Leaky ReLU as the activation function
Experiment-1
Iter-100 loss: 2.6817 validation: 0.215000
Iter-200 loss: 2.4829 validation: 0.386000
Iter-300 loss: 2.3156 validation: 0.524800
Iter-400 loss: 2.2113 validation: 0.617800
Iter-500 loss: 1.9642 validation: 0.686600
Iter-600 loss: 1.8801 validation: 0.739400
Iter-700 loss: 1.5296 validation: 0.769200
Iter-800 loss: 1.5097 validation: 0.788800
Iter-900 loss: 1.3905 validation: 0.807400
Iter-1000 loss: 1.2148 validation: 0.824600
Test Mean accuracy: 0.8189, std: 0.0000
In [ ]:
Leaky ReLU as the activation function
Experiment-1
Iter-100 loss: 2.5315 validation: 0.261000
Iter-200 loss: 2.1878 validation: 0.467800
Iter-300 loss: 2.1681 validation: 0.594600
Iter-400 loss: 1.7277 validation: 0.674800
Iter-500 loss: 1.8410 validation: 0.729600
Iter-600 loss: 1.5907 validation: 0.763200
Iter-700 loss: 1.3094 validation: 0.787600
Iter-800 loss: 1.1305 validation: 0.807400
Iter-900 loss: 1.2506 validation: 0.825200
Iter-1000 loss: 1.1406 validation: 0.836200
Test Mean accuracy: 0.8349, std: 0.0000
In [ ]:
Noisy ReLU as the activation function
Experiment-1
Iter-100 loss: 2.5244 validation: 0.240200
Iter-200 loss: 2.8395 validation: 0.092400
Iter-300 loss: 2.7289 validation: 0.337200
Iter-400 loss: 2.8181 validation: 0.324400
Iter-500 loss: 1.9065 validation: 0.576200
Iter-600 loss: 2.8347 validation: 0.558600
Iter-700 loss: 2.8346 validation: 0.693000
Iter-800 loss: 1.4126 validation: 0.727800
Iter-900 loss: 2.8348 validation: 0.724000
Iter-1000 loss: 2.6473 validation: 0.766800
Test Mean accuracy: 0.1340, std: 0.0000
In [ ]:
Noisy ReLU as the activation function
Experiment-1
Iter-100 loss: 2.5244 validation: 0.240200
Iter-200 loss: 2.8395 validation: 0.092400
Iter-300 loss: 2.7289 validation: 0.337200
Iter-400 loss: 2.8181 validation: 0.324400
Iter-500 loss: 1.9065 validation: 0.576200
Iter-600 loss: 2.8347 validation: 0.558600
Iter-700 loss: 2.8346 validation: 0.693000
Iter-800 loss: 1.4126 validation: 0.727800
Iter-900 loss: 2.8348 validation: 0.724000
Iter-1000 loss: 2.6473 validation: 0.766800
Test Mean accuracy: 0.1340, std: 0.0000
In [ ]:
ReLU as the activation function
Experiment-1
Iter-100 loss: 2.7588 validation: 0.272200
Iter-200 loss: 2.3402 validation: 0.432400
Iter-300 loss: 2.4199 validation: 0.543000
Iter-400 loss: 2.0488 validation: 0.600800
Iter-500 loss: 1.9510 validation: 0.660400
Iter-600 loss: 1.7482 validation: 0.702000
Iter-700 loss: 1.6421 validation: 0.735200
Iter-800 loss: 1.7244 validation: 0.761200
Iter-900 loss: 1.6122 validation: 0.782600
Iter-1000 loss: 1.3812 validation: 0.794200
Test Mean accuracy: 0.7962, std: 0.0000
In [ ]:
ReLU as the activation function
Experiment-1
Iter-100 loss: 2.2678 validation: 0.497600
Iter-200 loss: 1.9497 validation: 0.688200
Iter-300 loss: 1.7068 validation: 0.757000
Iter-400 loss: 1.5548 validation: 0.794200
Iter-500 loss: 1.1759 validation: 0.820800
Iter-600 loss: 1.2454 validation: 0.833600
Iter-700 loss: 1.0927 validation: 0.842600
Iter-800 loss: 1.1915 validation: 0.853200
Iter-900 loss: 1.3234 validation: 0.858800
Iter-1000 loss: 0.9408 validation: 0.865600
Test Mean accuracy: 0.8652, std: 0.0000
In [ ]:
Sigmoid as the activation function
Experiment-1
Iter-100 loss: 2.8707 validation: 0.163600
Iter-200 loss: 2.8303 validation: 0.203600
Iter-300 loss: 2.8109 validation: 0.243000
Iter-400 loss: 2.8171 validation: 0.304000
Iter-500 loss: 2.7857 validation: 0.312000
Iter-600 loss: 2.7644 validation: 0.383200
Iter-700 loss: 2.7804 validation: 0.341600
Iter-800 loss: 2.7593 validation: 0.382400
Iter-900 loss: 2.7501 validation: 0.442000
Iter-1000 loss: 2.7471 validation: 0.434000
Test Mean accuracy: 0.4325, std: 0.0000
In [ ]:
Sigmoid as the activation function
Experiment-1
Iter-100 loss: 2.8707 validation: 0.163600
Iter-200 loss: 2.8303 validation: 0.203600
Iter-300 loss: 2.8109 validation: 0.243000
Iter-400 loss: 2.8171 validation: 0.304000
Iter-500 loss: 2.7857 validation: 0.312000
Iter-600 loss: 2.7644 validation: 0.383200
Iter-700 loss: 2.7804 validation: 0.341600
Iter-800 loss: 2.7593 validation: 0.382400
Iter-900 loss: 2.7501 validation: 0.442000
Iter-1000 loss: 2.7471 validation: 0.434000
Test Mean accuracy: 0.4325, std: 0.0000
In [ ]:
# Done on the MacBook
ELU as the activation function
Experiment-1
Iter-100 loss: 2.0913 validation: 0.571600
Iter-200 loss: 1.5013 validation: 0.734800
Iter-300 loss: 1.3522 validation: 0.784200
Iter-400 loss: 1.1497 validation: 0.811400
Iter-500 loss: 1.1320 validation: 0.831800
Iter-600 loss: 1.2433 validation: 0.844600
Iter-700 loss: 0.9754 validation: 0.852400
Iter-800 loss: 1.1539 validation: 0.861400
Iter-900 loss: 1.0010 validation: 0.869000
Iter-1000 loss: 0.8878 validation: 0.874400
Test Mean accuracy: 0.8775, std: 0.0000
In [ ]:
ELU as the activation function
Experiment-1
Iter-100 loss: 2.1250 validation: 0.504400
Iter-200 loss: 1.7947 validation: 0.682400
Iter-300 loss: 1.4792 validation: 0.752200
Iter-400 loss: 1.2754 validation: 0.789000
Iter-500 loss: 1.2730 validation: 0.810600
Iter-600 loss: 1.3395 validation: 0.827400
Iter-700 loss: 1.1824 validation: 0.836400
Iter-800 loss: 1.0915 validation: 0.847000
Iter-900 loss: 1.2503 validation: 0.855000
Iter-1000 loss: 1.0475 validation: 0.861600
Test Mean accuracy: 0.8576, std: 0.0000
In [ ]:
ELU as the activation function
Experiment-1
Iter-100 loss: 2.0744 validation: 0.527600
Iter-200 loss: 1.6713 validation: 0.676600
Iter-300 loss: 1.4484 validation: 0.743800
Iter-400 loss: 1.2608 validation: 0.781800
Iter-500 loss: 1.2955 validation: 0.803600
Iter-600 loss: 1.2829 validation: 0.823400
Iter-700 loss: 1.1169 validation: 0.838600
Iter-800 loss: 1.0358 validation: 0.847200
Iter-900 loss: 1.0446 validation: 0.853200
Iter-1000 loss: 0.9594 validation: 0.859000
Test Mean accuracy: 0.8644, std: 0.0000
In [ ]:
Leaky ReLU as the activation function
Experiment-1
Iter-100 loss: 2.0169 validation: 0.481000
Iter-200 loss: 1.6019 validation: 0.668600
Iter-300 loss: 1.6799 validation: 0.740800
Iter-400 loss: 1.0977 validation: 0.776200
Iter-500 loss: 1.1473 validation: 0.801800
Iter-600 loss: 1.1161 validation: 0.816200
Iter-700 loss: 1.1787 validation: 0.828600
Iter-800 loss: 1.1867 validation: 0.837200
Iter-900 loss: 1.0120 validation: 0.845200
Iter-1000 loss: 1.3673 validation: 0.855200
Test Mean accuracy: 0.8546, std: 0.0000
In [ ]:
Leaky ReLU as the activation function
Experiment-1
Iter-100 loss: 1.7843 validation: 0.454600
Iter-200 loss: 1.6087 validation: 0.643000
Iter-300 loss: 1.5869 validation: 0.730400
Iter-400 loss: 1.4397 validation: 0.773600
Iter-500 loss: 1.3193 validation: 0.799600
Iter-600 loss: 1.1177 validation: 0.814000
Iter-700 loss: 1.0548 validation: 0.827400
Iter-800 loss: 1.0699 validation: 0.839000
Iter-900 loss: 0.9901 validation: 0.850600
Iter-1000 loss: 1.3588 validation: 0.856200
Test Mean accuracy: 0.8573, std: 0.0000
In [ ]:
Noisy ReLU as the activation function
Experiment-1
Iter-100 loss: 6.2892 validation: 0.137600
Iter-200 loss: 2.3478 validation: 0.477800
Iter-300 loss: 2.4435 validation: 0.532000
Iter-400 loss: 2.1835 validation: 0.653800
Iter-500 loss: 2.5892 validation: 0.240600
Iter-600 loss: 1.4881 validation: 0.312800
Iter-700 loss: 2.8304 validation: 0.748000
Iter-800 loss: 2.2582 validation: 0.428200
Iter-900 loss: 1.3606 validation: 0.817400
Iter-1000 loss: 1.3771 validation: 0.626000
Test Mean accuracy: 0.7717, std: 0.0000
In [ ]:
Noisy ReLU as the activation function
Experiment-1
Iter-100 loss: 3.1534 validation: 0.301800
Iter-200 loss: 2.5504 validation: 0.515200
Iter-300 loss: 3.3991 validation: 0.098600
Iter-400 loss: 2.0483 validation: 0.664600
Iter-500 loss: 2.6379 validation: 0.475400
Iter-600 loss: 2.8357 validation: 0.661800
Iter-700 loss: 1.6504 validation: 0.776400
Iter-800 loss: 1.2664 validation: 0.595200
Iter-900 loss: 1.0656 validation: 0.800600
Iter-1000 loss: 1.2572 validation: 0.825200
Test Mean accuracy: 0.8208, std: 0.0000
In [ ]:
Tanh as the activation function
Experiment-1
Iter-100 loss: 2.3000 validation: 0.445200
Iter-200 loss: 1.7056 validation: 0.641000
Iter-300 loss: 1.6761 validation: 0.734600
Iter-400 loss: 1.5098 validation: 0.779800
Iter-500 loss: 1.3996 validation: 0.805600
Iter-600 loss: 1.2056 validation: 0.825200
Iter-700 loss: 1.2645 validation: 0.837000
Iter-800 loss: 1.1024 validation: 0.845400
Iter-900 loss: 1.1596 validation: 0.851200
Iter-1000 loss: 1.0612 validation: 0.857200
Test Mean accuracy: 0.8602, std: 0.0000
In [ ]:
Tanh as the activation function
Experiment-1
Iter-100 loss: 2.2386 validation: 0.571400
Iter-200 loss: 1.6044 validation: 0.718000
Iter-300 loss: 1.6064 validation: 0.772200
Iter-400 loss: 1.5191 validation: 0.807000
Iter-500 loss: 1.2053 validation: 0.827000
Iter-600 loss: 1.3840 validation: 0.843800
Iter-700 loss: 1.2664 validation: 0.852000
Iter-800 loss: 1.0052 validation: 0.858400
Iter-900 loss: 1.0289 validation: 0.864400
Iter-1000 loss: 1.1386 validation: 0.868200
Test Mean accuracy: 0.8691, std: 0.0000
In [ ]:
Sigmoid as the activation function
Experiment-1
Iter-100 loss: 2.8208 validation: 0.130400
Iter-200 loss: 2.8123 validation: 0.203800
Iter-300 loss: 2.8142 validation: 0.228400
Iter-400 loss: 2.7853 validation: 0.265400
Iter-500 loss: 2.7853 validation: 0.302200
Iter-600 loss: 2.7773 validation: 0.306200
Iter-700 loss: 2.7528 validation: 0.272200
Iter-800 loss: 2.7289 validation: 0.347600
Iter-900 loss: 2.7664 validation: 0.329400
Iter-1000 loss: 2.7209 validation: 0.392600
Test Mean accuracy: 0.4071, std: 0.0000
In [ ]:
Sigmoid as the activation function
Experiment-1
Iter-100 loss: 2.8405 validation: 0.166200
Iter-200 loss: 2.8092 validation: 0.145800
Iter-300 loss: 2.7685 validation: 0.236800
Iter-400 loss: 2.7846 validation: 0.272000
Iter-500 loss: 2.7658 validation: 0.290000
Iter-600 loss: 2.7464 validation: 0.360600
Iter-700 loss: 2.7153 validation: 0.425400
Iter-800 loss: 2.7170 validation: 0.418800
Iter-900 loss: 2.7004 validation: 0.451000
Iter-1000 loss: 2.6949 validation: 0.474600
Test Mean accuracy: 0.4740, std: 0.0000
In [ ]:
ReLU as the activation function
Experiment-1
Iter-100 loss: 2.7328 validation: 0.209000
Iter-200 loss: 2.5418 validation: 0.370800
Iter-300 loss: 2.3551 validation: 0.527000
Iter-400 loss: 2.1554 validation: 0.618400
Iter-500 loss: 1.9614 validation: 0.673200
Iter-600 loss: 1.8192 validation: 0.715200
Iter-700 loss: 1.4898 validation: 0.747000
Iter-800 loss: 1.5309 validation: 0.768600
Iter-900 loss: 1.2143 validation: 0.793200
Iter-1000 loss: 1.3223 validation: 0.808000
Test Mean accuracy: 0.8108, std: 0.0000
In [ ]:
ReLU as the activation function
Experiment-1
Iter-100 loss: 2.3022 validation: 0.499600
Iter-200 loss: 1.9636 validation: 0.678400
Iter-300 loss: 1.5109 validation: 0.753600
Iter-400 loss: 1.3551 validation: 0.792400
Iter-500 loss: 1.2011 validation: 0.813600
Iter-600 loss: 1.1262 validation: 0.832600
Iter-700 loss: 1.0109 validation: 0.844600
Iter-800 loss: 0.9747 validation: 0.857200
Iter-900 loss: 1.1180 validation: 0.863200
Iter-1000 loss: 1.1000 validation: 0.870600
Test Mean accuracy: 0.8685, std: 0.0000
In [ ]:
Softplus as the activation function
Experiment-1
Iter-100 loss: 2.7376 validation: 0.239600
Iter-200 loss: 2.4941 validation: 0.407400
Iter-300 loss: 2.4425 validation: 0.536400
Iter-400 loss: 2.1267 validation: 0.587800
Iter-500 loss: 1.9186 validation: 0.671600
Iter-600 loss: 1.8696 validation: 0.674200
Iter-700 loss: 1.7577 validation: 0.730000
Iter-800 loss: 1.5699 validation: 0.745000
Iter-900 loss: 1.3756 validation: 0.755200
Iter-1000 loss: 1.5521 validation: 0.771200
In [ ]:
Softplus as the activation function
Experiment-1
Iter-100 loss: 2.6229 validation: 0.355600
Iter-200 loss: 2.2358 validation: 0.491200
Iter-300 loss: 2.1240 validation: 0.606200
Iter-400 loss: 2.0498 validation: 0.648600
Iter-500 loss: 1.8687 validation: 0.696400
Iter-600 loss: 1.5448 validation: 0.732800
Iter-700 loss: 1.5075 validation: 0.752800
Iter-800 loss: 1.3674 validation: 0.780000
Iter-900 loss: 1.3848 validation: 0.783600
Iter-1000 loss: 1.2744 validation: 0.809600
Test Mean accuracy: 0.8181, std: 0.0000
In [ ]:
Integral Tanh as the activation function
Experiment-1
Iter-100 loss: 2.7889 validation: 0.170600
Iter-200 loss: 2.7228 validation: 0.264000
Iter-300 loss: 2.5569 validation: 0.350800
Iter-400 loss: 2.5977 validation: 0.436200
Iter-500 loss: 2.4002 validation: 0.516000
Iter-600 loss: 2.2062 validation: 0.590200
Iter-700 loss: 2.1205 validation: 0.648800
Iter-800 loss: 1.8876 validation: 0.706800
Iter-900 loss: 1.6799 validation: 0.739800
Iter-1000 loss: 1.5942 validation: 0.765600
Test Mean accuracy: 0.7658, std: 0.0000
In [ ]:
Integral Tanh as the activation function
Experiment-1
Iter-100 loss: 2.8168 validation: 0.200200
Iter-200 loss: 2.6820 validation: 0.266000
Iter-300 loss: 2.6416 validation: 0.340400
Iter-400 loss: 2.5768 validation: 0.408000
Iter-500 loss: 2.2300 validation: 0.488200
Iter-600 loss: 2.1734 validation: 0.568600
Iter-700 loss: 1.9999 validation: 0.645000
Iter-800 loss: 1.9546 validation: 0.707800
Iter-900 loss: 1.6330 validation: 0.755800
Iter-1000 loss: 1.6077 validation: 0.783000
Test Mean accuracy: 0.7823, std: 0.0000
In [ ]:
# Integral Tanh
Experiment-1
Iter-100 loss: 2.7818 validation: 0.131200
Iter-200 loss: 2.6915 validation: 0.190200
Iter-300 loss: 2.7040 validation: 0.253400
Iter-400 loss: 2.6632 validation: 0.311000
Iter-500 loss: 2.5328 validation: 0.384600
Iter-600 loss: 2.4545 validation: 0.466800
Iter-700 loss: 2.3068 validation: 0.541200
Iter-800 loss: 2.2464 validation: 0.617200
Iter-900 loss: 2.0960 validation: 0.674800
Iter-1000 loss: 2.0130 validation: 0.716000
Test Mean accuracy: 0.7211, std: 0.0000
In [ ]:
# My ReLU
Experiment-1
Iter-100 loss: 2.4508 validation: 0.335400
Iter-200 loss: 2.0822 validation: 0.575000
Iter-300 loss: 1.7527 validation: 0.700000
Iter-400 loss: 1.6338 validation: 0.761400
Iter-500 loss: 1.2952 validation: 0.798000
Iter-600 loss: 1.2815 validation: 0.816800
Iter-700 loss: 1.1558 validation: 0.834600
Iter-800 loss: 1.4359 validation: 0.843800
Iter-900 loss: 0.9867 validation: 0.855200
Iter-1000 loss: 1.0205 validation: 0.863200
Test Mean accuracy: 0.8614, std: 0.0000
In [ ]:
# My ReLU
Experiment-1
Iter-100 loss: 2.6100 validation: 0.306600
Iter-200 loss: 2.4612 validation: 0.440600
Iter-300 loss: 2.1863 validation: 0.549200
Iter-400 loss: 2.0497 validation: 0.634800
Iter-500 loss: 1.7827 validation: 0.688600
Iter-600 loss: 1.6908 validation: 0.726400
Iter-700 loss: 1.4532 validation: 0.755600
Iter-800 loss: 1.3705 validation: 0.779800
Iter-900 loss: 1.4422 validation: 0.802200
Iter-1000 loss: 1.1921 validation: 0.816000
Mean accuracy: 0.8228, std: 0.0000
In [ ]:
# My Sigmoid
Experiment-1
Iter-100 loss: 2.8530 validation: 0.116800
Iter-200 loss: 2.8277 validation: 0.113200
Iter-300 loss: 2.8449 validation: 0.085200
Iter-400 loss: 2.8324 validation: 0.113400
Iter-500 loss: 2.8078 validation: 0.144600
Iter-600 loss: 2.8097 validation: 0.154400
Iter-700 loss: 2.8122 validation: 0.189800
Iter-800 loss: 2.8111 validation: 0.221000
Iter-900 loss: 2.7814 validation: 0.209000
Iter-1000 loss: 2.7981 validation: 0.219200
Mean accuracy: 0.2338, std: 0.0000
In [ ]:
# My Leaky ReLU
Experiment-1
Iter-100 loss: 2.8466 validation: 0.115000
Iter-200 loss: 2.8466 validation: 0.115000
Iter-300 loss: 2.8469 validation: 0.114800
Iter-400 loss: 2.8468 validation: 0.115200
Iter-500 loss: 2.8466 validation: 0.114200
Iter-600 loss: 2.8470 validation: 0.114200
Iter-700 loss: 2.8465 validation: 0.114000
Iter-800 loss: 2.8464 validation: 0.113600
Iter-900 loss: 2.8463 validation: 0.113200
Iter-1000 loss: 2.8463 validation: 0.113000
Mean accuracy: 0.1137, std: 0.0000
In [ ]:
# ReLU
Experiment-1
Iter-100 loss: 2.6305 validation: 0.352800
Iter-200 loss: 2.3055 validation: 0.569400
Iter-300 loss: 1.9629 validation: 0.676200
Iter-400 loss: 1.9984 validation: 0.735000
Iter-500 loss: 1.5353 validation: 0.773400
Iter-600 loss: 1.3668 validation: 0.800000
Iter-700 loss: 1.2604 validation: 0.817000
Iter-800 loss: 1.3004 validation: 0.829800
Iter-900 loss: 1.2728 validation: 0.840200
Iter-1000 loss: 1.1589 validation: 0.847000
Mean accuracy: 0.8565, std: 0.0000
In [ ]:
# ReLU
Experiment-1
Iter-100 loss: 2.3543 validation: 0.411600
Iter-200 loss: 1.8532 validation: 0.628800
Iter-300 loss: 1.6045 validation: 0.723800
Iter-400 loss: 1.4758 validation: 0.769800
Iter-500 loss: 1.2843 validation: 0.807800
Iter-600 loss: 1.3158 validation: 0.823600
Iter-700 loss: 1.0738 validation: 0.839000
Iter-800 loss: 1.2292 validation: 0.849400
Iter-900 loss: 1.0338 validation: 0.856000
Iter-1000 loss: 1.0669 validation: 0.862400
Mean accuracy: 0.8585, std: 0.0000
In [ ]:
# ReLU
Experiment-1
Iter-100 loss: 2.1721 validation: 0.567200
Iter-200 loss: 1.7561 validation: 0.716600
Iter-300 loss: 1.6734 validation: 0.776200
Iter-400 loss: 1.2296 validation: 0.804200
Iter-500 loss: 1.4233 validation: 0.823200
Iter-600 loss: 1.4145 validation: 0.844000
Iter-700 loss: 1.2430 validation: 0.851400
Iter-800 loss: 1.0693 validation: 0.860400
Iter-900 loss: 1.0845 validation: 0.866200
Iter-1000 loss: 0.8727 validation: 0.871200
Mean accuracy: 0.8693, std: 0.0000
In [ ]:
# ReLU
Experiment-1
Iter-100 loss: 2.4044 validation: 0.367600
Iter-200 loss: 2.1172 validation: 0.593600
Iter-300 loss: 1.7946 validation: 0.702600
Iter-400 loss: 1.4411 validation: 0.752000
Iter-500 loss: 1.4053 validation: 0.792000
Iter-600 loss: 1.3357 validation: 0.812600
Iter-700 loss: 1.2837 validation: 0.828400
Iter-800 loss: 1.1249 validation: 0.840600
Iter-900 loss: 1.1020 validation: 0.849000
Iter-1000 loss: 1.1252 validation: 0.858600
Mean accuracy: 0.8611, std: 0.0000
In [ ]:
# ReLU
Experiment-1
Iter-100 loss: 2.2644 validation: 0.325400
Iter-200 loss: 2.0461 validation: 0.538600
Iter-300 loss: 1.7831 validation: 0.640600
Iter-400 loss: 1.6459 validation: 0.698600
Iter-500 loss: 1.5289 validation: 0.734800
Iter-600 loss: 1.3727 validation: 0.764200
Iter-700 loss: 1.2147 validation: 0.786000
Iter-800 loss: 1.1812 validation: 0.808000
Iter-900 loss: 0.9627 validation: 0.825800
Iter-1000 loss: 0.9785 validation: 0.837200
Mean accuracy: 0.8441, std: 0.0000