In [223]:
%load_ext autoreload
%autoreload 2

import os
import numpy as np
import pandas as pd

from browser import *


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [226]:
new_exps = [
  'C100_DenseAdam',
  'C100_DenseSGD',
  'C100_SparseAdam',
  'C100_SparseSGD',
  'C10_DenseAdam',
  'C10_DenseSGD',
  'C10_SparseAdam',
  'C10_SparseSGD',
]

old_exps = [
  'VGG19DenseTest9v2',
  'VGG19SparseFull',
  'VGG19SparseFull-short',
  'VGG19SparseTest9b2',
]

exps = new_exps + old_exps

In [227]:
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)


No experiment state found for experiment /Users/lsouza/nta/results/VGG19SparseTest9b2

In [228]:
df.shape


Out[228]:
(616, 62)

In [229]:
df.head()


Out[229]:
Experiment Name test_accuracy test_accuracy_max epoch_test_accuracy noise_accuracy noise_accuracy_max epoch_noise_accuracy mean_accuracy mean_accuracy_max epoch_mean_accuracy ... test_batch_size test_batches_in_epoch upload_dir use_max_pooling weight_decay weight_sparsity stop learning_rate_gamma lr_step_schedule momentum
0 0_learning_rate=0.001,weight_decay=0.003543 0.3050 0.3147 153 0.0645 0.0892 54 0.18475 0.19485 145 ... 128 500 s3://lsouza/ray/results True 0.003543 1.0 NaN NaN NaN NaN
1 1_learning_rate=0.001,weight_decay=0.0068785 0.2972 0.3189 162 0.0650 0.0887 162 0.18110 0.20380 162 ... 128 500 s3://lsouza/ray/results True 0.006879 1.0 NaN NaN NaN NaN
2 2_learning_rate=0.0003,weight_decay=0.0054007 0.6342 0.6443 159 0.1416 0.1879 156 0.38790 0.40795 162 ... 128 500 s3://lsouza/ray/results True 0.005401 1.0 NaN NaN NaN NaN
3 3_learning_rate=0.003,weight_decay=0.0007664 0.2215 0.2394 144 0.0490 0.0671 63 0.13525 0.14775 144 ... 128 500 s3://lsouza/ray/results True 0.000766 1.0 NaN NaN NaN NaN
4 4_learning_rate=0.03,weight_decay=0.0012639 0.0344 0.0502 136 0.0099 0.0418 33 0.02215 0.04210 33 ... 128 500 s3://lsouza/ray/results True 0.001264 1.0 NaN NaN NaN NaN

5 rows × 62 columns


In [230]:
df.iloc[-1]


Out[230]:
Experiment Name           149_batches_in_epoch=575,boost_strength=1.0613...
test_accuracy                                                        0.1924
test_accuracy_max                                                    0.1924
epoch_test_accuracy                                                      19
noise_accuracy                                                       0.1458
noise_accuracy_max                                                   0.1469
epoch_noise_accuracy                                                     17
mean_accuracy                                                        0.1691
mean_accuracy_max                                                    0.1691
epoch_mean_accuracy                                                      19
epochs                                                                   20
start_learning_rate                                               0.0973613
end_learning_rate                                                 0.0973613
early_stop                                                                0
experiment_file_name      /Users/lsouza/nta/results/VGG19SparseFull-shor...
trial_time                                                          59.0119
mean_epoch_time                                                     2.95059
trial_train_time                                                    50.6388
mean_epoch_train_time                                               2.53194
batch_size                                                              128
batches_in_epoch                                                        575
batches_in_first_epoch                                                  600
block_sizes                                                             3.2
boost_strength                                                      1.06132
boost_strength_factor                                              0.799318
checkpoint_at_end                                                      True
cnn_kernel_size                                                           3
cnn_out_channels                                                      294.4
cnn_percent_on                                                     0.174313
cnn_weight_sparsity                                                0.773573
                                                ...                        
dataset                                                            CIFAR100
experiment                                                             grid
first_epoch_batch_size                                                    4
gpu_percentage                                                         0.14
input_shape                                                         22.3333
iterations                                                              164
k_inference_factor                                                  1.12695
learning_rate                                                     0.0973613
linear_n                                                                NaN
linear_percent_on                                                       NaN
name                                                        VGG19SparseFull
network_type                                                            vgg
num_cpus                                                                 31
num_gpus                                                                  4
optimizer                                                               NaN
output_size                                                             100
path                                                          ~/nta/results
repetitions                                                             150
restore_supported                                                      True
sync_function             aws s3 sync `dirname {local_dir}` {remote_dir}...
test_batch_size                                                         128
test_batches_in_epoch                                                   500
upload_dir                                          s3://lsouza/ray/results
use_max_pooling                                                        True
weight_decay                                                    0.000384426
weight_sparsity                                                         NaN
stop                                                                    NaN
learning_rate_gamma                                                0.177053
lr_step_schedule                                                      101.5
momentum                                                           0.644845
Name: 615, Length: 62, dtype: object

In [ ]:
filters = (df['name'].str.startswith('C100_SparseSGD'))
         
(df[filters]
  [['start_learning_rate', 'end_learning_rate', 'early_stop']]
  .head(20))

In [243]:
filters = (df['name'].str.startswith('C100_SparseAdam'))
         
(df[filters]
  [['start_learning_rate', 'end_learning_rate']]
  .head(20))


Out[243]:
start_learning_rate end_learning_rate
35 0.0010 1.000000e-03
36 0.0100 1.000000e-02
37 0.0030 3.000000e-03
38 0.0001 1.000000e-04
39 0.0030 3.000000e-03
40 0.1000 1.000000e-01
41 0.0003 4.733613e-05
42 0.0300 1.440304e-03
43 0.1000 3.342565e-06
44 0.0003 4.296152e-05
45 0.0010 1.000000e-03
46 0.0010 1.000000e-03
47 0.1000 1.114699e-02
48 0.0001 3.310691e-06
49 0.0030 2.121552e-03
50 0.0003 3.505160e-07
51 0.1000 1.286253e-03
52 0.0003 8.182201e-05
53 0.0003 1.645728e-04
54 0.1000 1.567386e-02

In [234]:
df['start_learning_rate'].max()


Out[234]:
0.14993308514938847

In [235]:
df['end_learning_rate'].min()


Out[235]:
5.84348080181885e-09

In [231]:
sum(df['early_stop']) / len(df)


Out[231]:
0.12012987012987013

In [108]:
(df.groupby(['dataset', 'name'])['test_accuracy_max', 'test_accuracy', 'noise_accuracy_max', 
                                 'noise_accuracy', 'trial_time', 'batches_in_epoch',
                                 'epochs']
                                 .max().round(2))


Out[108]:
test_accuracy_max test_accuracy noise_accuracy_max noise_accuracy trial_time batches_in_epoch epochs
dataset name
CIFAR10 C10_DenseAdam 0.92 0.92 0.60 0.56 507.36 500 164
C10_DenseSGD 0.93 0.93 0.65 0.58 500.30 500 164
C10_SparseAdam 0.92 0.92 0.63 0.61 569.74 500 164
C10_SparseSGD 0.92 0.92 0.69 0.67 558.73 500 164
CIFAR100 C100_DenseAdam 0.64 0.63 0.24 0.17 509.73 500 164
C100_DenseSGD 0.71 0.71 0.27 0.23 502.12 500 164
C100_SparseAdam 0.68 0.68 0.26 0.22 652.49 500 192
C100_SparseSGD 0.67 0.67 0.33 0.32 562.81 500 164
VGG19DenseTest9v2 0.72 0.72 0.24 0.22 348.41 400 200
VGG19SparseFull 0.71 0.70 0.33 0.31 560.62 599 164

In [109]:
(df.groupby(['dataset', 'name'])['test_accuracy_max', 'test_accuracy', 'noise_accuracy_max', 
                                 'noise_accuracy', 'trial_time', 'batches_in_epoch',
                                 'epochs']
                                 .mean().round(2))


Out[109]:
test_accuracy_max test_accuracy noise_accuracy_max noise_accuracy trial_time batches_in_epoch epochs
dataset name
CIFAR10 C10_DenseAdam 0.69 0.69 0.42 0.38 269.53 500.00 89.56
C10_DenseSGD 0.92 0.90 0.61 0.53 282.71 500.00 93.56
C10_SparseAdam 0.71 0.70 0.46 0.41 247.46 500.00 71.81
C10_SparseSGD 0.87 0.85 0.63 0.58 264.54 500.00 79.65
CIFAR100 C100_DenseAdam 0.24 0.22 0.08 0.06 270.49 500.00 87.61
C100_DenseSGD 0.66 0.65 0.23 0.20 216.69 500.00 71.71
C100_SparseAdam 0.38 0.37 0.12 0.10 195.42 500.00 58.58
C100_SparseSGD 0.48 0.46 0.22 0.18 155.17 500.00 47.19
VGG19DenseTest9v2 0.39 0.39 0.15 0.13 158.71 400.00 91.25
VGG19SparseFull 0.30 0.29 0.16 0.14 116.88 455.47 39.10

In [64]:
(df.groupby(['dataset', 'name'])['test_accuracy_max', 'test_accuracy', 'noise_accuracy_max', 
                                 'noise_accuracy', 'experiment_time', 'batches_in_epoch']
                                 .mean().round(3))


Out[64]:
test_accuracy_max test_accuracy noise_accuracy_max noise_accuracy
dataset name
CIFAR10 C10_DenseAdam 9 9 9 9
C10_DenseSGD 9 9 9 9
C10_SparseAdam 16 16 16 16
C10_SparseSGD 17 17 17 17
CIFAR100 C100_DenseAdam 18 18 18 18
C100_DenseSGD 17 17 17 17
C100_SparseAdam 36 36 36 36
C100_SparseSGD 36 36 36 36
VGG19DenseTest9v2 8 8 8 8
VGG19SparseFull 450 450 450 450

How is the custom early stopping behaving?


In [73]:
metrics = ['epochs', 'test_accuracy_max', 'test_accuracy', 'noise_accuracy_max', 'noise_accuracy']
df[df['name'].str.startswith('C10_')][['name'] + metrics]

# (['dataset', 'name'])['test_accuracy_max', 'test_accuracy', 'noise_accuracy_max', 'noise_accuracy']


Out[73]:
name epochs test_accuracy_max test_accuracy noise_accuracy_max noise_accuracy
107 C10_DenseAdam 164 0.6972 0.6972 0.3899 0.3715
108 C10_DenseAdam 164 0.9043 0.9020 0.5527 0.4671
109 C10_DenseAdam 113 0.8505 0.8490 0.4874 0.4012
110 C10_DenseAdam 108 0.9217 0.9217 0.5980 0.5604
111 C10_DenseAdam 42 0.9160 0.9156 0.5555 0.4918
112 C10_DenseAdam 133 0.7525 0.7516 0.4061 0.3638
113 C10_DenseAdam 20 0.2093 0.1859 0.2086 0.2026
114 C10_DenseAdam 20 0.1000 0.1000 0.1000 0.1000
115 C10_DenseAdam 42 0.8927 0.8927 0.5138 0.4440
116 C10_DenseSGD 164 0.9280 0.9263 0.6358 0.5562
117 C10_DenseSGD 164 0.9270 0.9264 0.5805 0.5536
118 C10_DenseSGD 164 0.9249 0.9240 0.5977 0.5497
119 C10_DenseSGD 111 0.9292 0.9279 0.5619 0.5411
120 C10_DenseSGD 59 0.9174 0.9097 0.6022 0.5361
121 C10_DenseSGD 56 0.9204 0.9178 0.6467 0.5218
122 C10_DenseSGD 58 0.9204 0.9191 0.6197 0.5754
123 C10_DenseSGD 46 0.8929 0.8725 0.6098 0.4424
124 C10_DenseSGD 20 0.8828 0.8072 0.6135 0.4722
125 C10_SparseAdam 164 0.7793 0.7778 0.4911 0.4604
126 C10_SparseAdam 164 0.7706 0.7687 0.4219 0.3921
127 C10_SparseAdam 164 0.4724 0.4584 0.3146 0.3010
128 C10_SparseAdam 96 0.8687 0.8646 0.5619 0.4432
129 C10_SparseAdam 82 0.8505 0.8496 0.5005 0.4763
130 C10_SparseAdam 79 0.9089 0.9075 0.5824 0.4917
131 C10_SparseAdam 76 0.7626 0.7626 0.4898 0.3976
132 C10_SparseAdam 20 0.2468 0.1847 0.2086 0.1728
133 C10_SparseAdam 52 0.8827 0.8776 0.5514 0.4102
134 C10_SparseAdam 20 0.3139 0.2661 0.2289 0.1920
135 C10_SparseAdam 42 0.9095 0.9080 0.6034 0.5401
136 C10_SparseAdam 44 0.9008 0.9008 0.6170 0.5996
137 C10_SparseAdam 59 0.9156 0.9156 0.6242 0.6104
138 C10_SparseAdam 20 0.5972 0.5972 0.3667 0.2643
139 C10_SparseAdam 47 0.9044 0.9030 0.6338 0.6122
140 C10_SparseAdam 20 0.2306 0.2044 0.2229 0.2229
141 C10_SparseSGD 164 0.8992 0.8973 0.6070 0.5997
142 C10_SparseSGD 164 0.9043 0.9033 0.6415 0.6159
143 C10_SparseSGD 164 0.8361 0.8330 0.6768 0.6687
144 C10_SparseSGD 164 0.8571 0.8542 0.6573 0.6383
145 C10_SparseSGD 97 0.9233 0.9223 0.6668 0.6298
146 C10_SparseSGD 81 0.8975 0.8942 0.6444 0.6184
147 C10_SparseSGD 81 0.9101 0.9059 0.6523 0.6291
148 C10_SparseSGD 48 0.9151 0.9144 0.6553 0.5807
149 C10_SparseSGD 63 0.9092 0.9078 0.6887 0.6578
150 C10_SparseSGD 67 0.9104 0.9100 0.6721 0.6437
151 C10_SparseSGD 20 0.7633 0.6576 0.5416 0.4555
152 C10_SparseSGD 49 0.8888 0.8874 0.6823 0.6554
153 C10_SparseSGD 74 0.9113 0.9099 0.6413 0.5882
154 C10_SparseSGD 20 0.7365 0.7167 0.5925 0.5150
155 C10_SparseSGD 58 0.8595 0.8532 0.6858 0.6616
156 C10_SparseSGD 20 0.7937 0.6986 0.5731 0.2337
157 C10_SparseSGD 20 0.7990 0.7990 0.4635 0.3973

In [138]:
metrics = ['epochs', 'test_accuracy', 'test_accuracy_max', 'noise_accuracy', 'noise_accuracy_max', 'early_stop']
df[df['name'].str.startswith('C100_DenseSGD')][['name'] + metrics].sort_values(['test_accuracy_max'], ascending=False)


Out[138]:
name epochs test_accuracy test_accuracy_max noise_accuracy noise_accuracy_max early_stop
28 C100_DenseSGD 51 0.7115 0.7124 0.2102 0.2676 1
31 C100_DenseSGD 42 0.7124 0.7124 0.2231 0.2433 1
26 C100_DenseSGD 42 0.7055 0.7057 0.1957 0.2495 1
23 C100_DenseSGD 91 0.7007 0.7024 0.2234 0.2434 1
27 C100_DenseSGD 43 0.6990 0.6996 0.2086 0.2336 1
32 C100_DenseSGD 42 0.6936 0.6980 0.1976 0.2203 1
30 C100_DenseSGD 42 0.6928 0.6946 0.2310 0.2594 1
19 C100_DenseSGD 164 0.6836 0.6875 0.2044 0.2199 0
22 C100_DenseSGD 95 0.6772 0.6798 0.2124 0.2339 1
20 C100_DenseSGD 164 0.6746 0.6766 0.2211 0.2313 0
24 C100_DenseSGD 89 0.6697 0.6759 0.2105 0.2442 1
21 C100_DenseSGD 110 0.6559 0.6580 0.2089 0.2188 1
18 C100_DenseSGD 164 0.6507 0.6529 0.2011 0.2125 0
25 C100_DenseSGD 20 0.5566 0.5881 0.1333 0.1803 0
34 C100_DenseSGD 20 0.5627 0.5627 0.1055 0.2030 0
29 C100_DenseSGD 20 0.5035 0.5445 0.1713 0.2223 0
33 C100_DenseSGD 20 0.5294 0.5309 0.1735 0.2141 0

In [141]:
metrics = ['epochs', 'test_accuracy', 'test_accuracy_max', 'noise_accuracy', 'noise_accuracy_max', 'early_stop']
df[df['name'].str.startswith('C100_DenseAdam')][['name'] + metrics].sort_values(['test_accuracy_max'], ascending=False)


Out[141]:
name epochs test_accuracy test_accuracy_max noise_accuracy noise_accuracy_max early_stop
2 C100_DenseAdam 164 0.6342 0.6443 0.1416 0.1879 0
7 C100_DenseAdam 110 0.6303 0.6388 0.1572 0.1862 0
9 C100_DenseAdam 49 0.6124 0.6124 0.1715 0.2415 1
17 C100_DenseAdam 62 0.5994 0.6033 0.1321 0.1783 1
1 C100_DenseAdam 164 0.2972 0.3189 0.0650 0.0887 0
0 C100_DenseAdam 164 0.3050 0.3147 0.0645 0.0892 0
5 C100_DenseAdam 164 0.2564 0.2908 0.0534 0.0933 0
3 C100_DenseAdam 164 0.2215 0.2394 0.0490 0.0671 0
12 C100_DenseAdam 20 0.1485 0.1485 0.0333 0.0541 0
16 C100_DenseAdam 20 0.0876 0.0888 0.0423 0.0538 0
15 C100_DenseAdam 20 0.0524 0.0795 0.0231 0.0506 0
13 C100_DenseAdam 20 0.0235 0.0700 0.0407 0.0461 0
6 C100_DenseAdam 111 0.0274 0.0608 0.0148 0.0384 0
4 C100_DenseAdam 164 0.0344 0.0502 0.0099 0.0418 0
8 C100_DenseAdam 90 0.0116 0.0438 0.0107 0.0359 1
14 C100_DenseAdam 51 0.0100 0.0139 0.0100 0.0122 1
11 C100_DenseAdam 20 0.0098 0.0116 0.0102 0.0105 0
10 C100_DenseAdam 20 0.0100 0.0100 0.0100 0.0100 0

In [220]:
df['noise_diff'] =  df['noise_accuracy_max'] - df['noise_accuracy']
df['test_diff'] =  df['test_accuracy_max'] - df['test_accuracy']
metrics = ['test_accuracy_max', 'test_accuracy', 'test_diff',
           'noise_accuracy_max','noise_accuracy',  'noise_diff']

(df[df['name'].str.startswith('C100_SparseSGD')][metrics]
              .sort_values(['test_accuracy_max'], ascending=False))


Out[220]:
test_accuracy_max test_accuracy test_diff noise_accuracy_max noise_accuracy noise_diff
71 0.6731 0.6722 0.0009 0.2968 0.2610 0.0358
72 0.6720 0.6720 0.0000 0.2761 0.2524 0.0237
92 0.6707 0.6707 0.0000 0.2696 0.2444 0.0252
94 0.6706 0.6684 0.0022 0.2524 0.2256 0.0268
100 0.6683 0.6683 0.0000 0.2559 0.2484 0.0075
74 0.6669 0.6640 0.0029 0.2506 0.2310 0.0196
88 0.6667 0.6663 0.0004 0.2583 0.2400 0.0183
93 0.6604 0.6592 0.0012 0.2516 0.2491 0.0025
73 0.6566 0.6507 0.0059 0.2889 0.2749 0.0140
98 0.6545 0.6522 0.0023 0.2839 0.2682 0.0157
101 0.6475 0.6475 0.0000 0.2571 0.2533 0.0038
106 0.6461 0.6324 0.0137 0.2725 0.2360 0.0365
83 0.6428 0.6411 0.0017 0.2978 0.2749 0.0229
77 0.6284 0.6255 0.0029 0.2823 0.2218 0.0605
91 0.6124 0.6117 0.0007 0.2302 0.2156 0.0146
97 0.5928 0.5657 0.0271 0.2499 0.2111 0.0388
75 0.5913 0.5900 0.0013 0.2576 0.2460 0.0116
90 0.5574 0.5451 0.0123 0.3280 0.3231 0.0049
89 0.5424 0.5393 0.0031 0.1871 0.1774 0.0097
76 0.4721 0.4692 0.0029 0.2350 0.2141 0.0209
80 0.4469 0.3660 0.0809 0.1680 0.0840 0.0840
79 0.4426 0.4170 0.0256 0.2556 0.2340 0.0216
85 0.4010 0.2041 0.1969 0.1744 0.1041 0.0703
81 0.3475 0.3083 0.0392 0.2509 0.0501 0.2008
84 0.3307 0.3307 0.0000 0.2120 0.0831 0.1289
102 0.3137 0.2994 0.0143 0.2136 0.1997 0.0139
78 0.3111 0.2694 0.0417 0.1870 0.1314 0.0556
103 0.2977 0.2571 0.0406 0.2357 0.1388 0.0969
99 0.2828 0.2828 0.0000 0.1461 0.1014 0.0447
82 0.2728 0.2728 0.0000 0.1569 0.1486 0.0083
95 0.2678 0.1672 0.1006 0.1797 0.1414 0.0383
105 0.2536 0.2536 0.0000 0.0715 0.0492 0.0223
87 0.2132 0.1639 0.0493 0.1168 0.0697 0.0471
104 0.1851 0.1617 0.0234 0.0999 0.0645 0.0354
96 0.1707 0.1707 0.0000 0.1226 0.0977 0.0249
86 0.1583 0.1489 0.0094 0.1095 0.0524 0.0571

In [219]:
df[metrics].mean()


Out[219]:
test_accuracy         0.356074
test_accuracy_max     0.362631
test_diff             0.006557
noise_accuracy        0.164737
noise_accuracy_max    0.190790
noise_diff            0.026052
dtype: float64

In [140]:
metrics = ['epochs', 'test_accuracy', 'test_accuracy_max', 'noise_accuracy', 'noise_accuracy_max', 'early_stop']
df[df['name'].str.startswith('C100_SparseSGD')][['name'] + metrics].sort_values(['test_accuracy_max'], ascending=False)


Out[140]:
name epochs test_accuracy test_accuracy_max noise_accuracy noise_accuracy_max early_stop
71 C100_SparseSGD 164 0.6722 0.6731 0.2610 0.2968 0
72 C100_SparseSGD 102 0.6720 0.6720 0.2524 0.2761 1
92 C100_SparseSGD 58 0.6707 0.6707 0.2444 0.2696 1
94 C100_SparseSGD 52 0.6684 0.6706 0.2256 0.2524 1
100 C100_SparseSGD 48 0.6683 0.6683 0.2484 0.2559 1
74 C100_SparseSGD 98 0.6640 0.6669 0.2310 0.2506 1
88 C100_SparseSGD 62 0.6663 0.6667 0.2400 0.2583 1
93 C100_SparseSGD 50 0.6592 0.6604 0.2491 0.2516 1
73 C100_SparseSGD 102 0.6507 0.6566 0.2749 0.2889 1
98 C100_SparseSGD 55 0.6522 0.6545 0.2682 0.2839 1
101 C100_SparseSGD 49 0.6475 0.6475 0.2533 0.2571 1
106 C100_SparseSGD 60 0.6324 0.6461 0.2360 0.2725 0
83 C100_SparseSGD 54 0.6411 0.6428 0.2749 0.2978 1
77 C100_SparseSGD 42 0.6255 0.6284 0.2218 0.2823 1
91 C100_SparseSGD 44 0.6117 0.6124 0.2156 0.2302 1
97 C100_SparseSGD 60 0.5657 0.5928 0.2111 0.2499 0
75 C100_SparseSGD 84 0.5900 0.5913 0.2460 0.2576 1
90 C100_SparseSGD 60 0.5451 0.5574 0.3231 0.3280 0
89 C100_SparseSGD 54 0.5393 0.5424 0.1774 0.1871 1
76 C100_SparseSGD 81 0.4692 0.4721 0.2141 0.2350 1
80 C100_SparseSGD 20 0.3660 0.4469 0.0840 0.1680 0
79 C100_SparseSGD 20 0.4170 0.4426 0.2340 0.2556 0
85 C100_SparseSGD 20 0.2041 0.4010 0.1041 0.1744 0
81 C100_SparseSGD 20 0.3083 0.3475 0.0501 0.2509 0
84 C100_SparseSGD 20 0.3307 0.3307 0.0831 0.2120 0
102 C100_SparseSGD 20 0.2994 0.3137 0.1997 0.2136 0
78 C100_SparseSGD 20 0.2694 0.3111 0.1314 0.1870 0
103 C100_SparseSGD 20 0.2571 0.2977 0.1388 0.2357 0
99 C100_SparseSGD 20 0.2828 0.2828 0.1014 0.1461 0
82 C100_SparseSGD 20 0.2728 0.2728 0.1486 0.1569 0
95 C100_SparseSGD 20 0.1672 0.2678 0.1414 0.1797 0
105 C100_SparseSGD 20 0.2536 0.2536 0.0492 0.0715 0
87 C100_SparseSGD 20 0.1639 0.2132 0.0697 0.1168 0
104 C100_SparseSGD 20 0.1617 0.1851 0.0645 0.0999 0
96 C100_SparseSGD 20 0.1707 0.1707 0.0977 0.1226 0
86 C100_SparseSGD 20 0.1489 0.1583 0.0524 0.1095 0

In [142]:
metrics = ['epochs', 'test_accuracy', 'test_accuracy_max', 'noise_accuracy', 'noise_accuracy_max', 'early_stop']
df[df['name'].str.startswith('C100_DenseSGD')][['name'] + metrics].sort_values(['test_accuracy_max'], ascending=False)


Out[142]:
name epochs test_accuracy test_accuracy_max noise_accuracy noise_accuracy_max early_stop
28 C100_DenseSGD 51 0.7115 0.7124 0.2102 0.2676 1
31 C100_DenseSGD 42 0.7124 0.7124 0.2231 0.2433 1
26 C100_DenseSGD 42 0.7055 0.7057 0.1957 0.2495 1
23 C100_DenseSGD 91 0.7007 0.7024 0.2234 0.2434 1
27 C100_DenseSGD 43 0.6990 0.6996 0.2086 0.2336 1
32 C100_DenseSGD 42 0.6936 0.6980 0.1976 0.2203 1
30 C100_DenseSGD 42 0.6928 0.6946 0.2310 0.2594 1
19 C100_DenseSGD 164 0.6836 0.6875 0.2044 0.2199 0
22 C100_DenseSGD 95 0.6772 0.6798 0.2124 0.2339 1
20 C100_DenseSGD 164 0.6746 0.6766 0.2211 0.2313 0
24 C100_DenseSGD 89 0.6697 0.6759 0.2105 0.2442 1
21 C100_DenseSGD 110 0.6559 0.6580 0.2089 0.2188 1
18 C100_DenseSGD 164 0.6507 0.6529 0.2011 0.2125 0
25 C100_DenseSGD 20 0.5566 0.5881 0.1333 0.1803 0
34 C100_DenseSGD 20 0.5627 0.5627 0.1055 0.2030 0
29 C100_DenseSGD 20 0.5035 0.5445 0.1713 0.2223 0
33 C100_DenseSGD 20 0.5294 0.5309 0.1735 0.2141 0
  • Too many early stops, even in promising cases (analysis also done on Tensorboard plots). The main issue seems to be that early stopping is based on the mean accuracy right now. At some point during training, the test training continues to increase while the noise accuracy starts to oscillate, many times decreasing a little bit, causing the mean accuracy to plateau and early stop signal to fire.
  • Best way to fix would be to set a signal that looks both at the test and the noise accuracy independently, and would only fire if both of them plateau. That would ensure we could drive promising experiments on the noise accuracy metric and the test accuracy metric all the way to the end, to get the best possible results.

What are the best possible parameters?

Can answer this question regarding:

  • test accuracy
  • mean accuracy
  • noise accuracy

In [148]:
tunable_params_general = ['learning_rate', 'learning_rate_gamma', 'weight_decay', 
                          'momentum', 'batch_size', 'batches_in_epoch']
tunable_params_sparsity = ['boost_strength', 'boost_strength_factor', 
                           'k_inference_factor', 'cnn_percent_on', 'cnn_weight_sparsity']
tunable_params = tunable_params_general + tunable_params_sparsity
performance_metrics = ['test_accuracy_max', 'mean_accuracy_max', 'noise_accuracy_max']

In [158]:
sparse_exps = [
  'C100_SparseAdam',
  'C100_SparseSGD',
  'C10_SparseAdam',
  'C10_SparseSGD',
  'VGG19SparseFull',
  'VGG19SparseFull-short',
  'VGG19SparseTest9b2',
]

dense_exps = [
  'VGG19DenseTest9v2',
  'C100_DenseAdam',
  'C100_DenseSGD',
  'C10_DenseAdam',
  'C10_DenseSGD',  
]

In [149]:
performance_metrics = ['test_accuracy_max', 'mean_accuracy_max', 'noise_accuracy_max']

In [174]:
def stats(arr):
  mean = np.mean(arr)
  std = np.std(arr)
  return [round(v, 4) for v in [mean-std, mean, mean+std]]

In [193]:
# CIFAR-10 SPARSE
filters = ((df['dataset']=='CIFAR10') & 
           (df['name'].isin(sparse_exps)) &
           (df['optimizer'] != 'Adam'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(5)
  .apply(stats))


Out[193]:
learning_rate            [0.0696, 0.0817, 0.0939]
learning_rate_gamma      [0.2215, 0.3903, 0.5592]
weight_decay              [0.0024, 0.004, 0.0057]
momentum                 [0.4172, 0.4472, 0.4772]
batch_size                  [128.0, 128.0, 128.0]
batches_in_epoch            [500.0, 500.0, 500.0]
boost_strength            [1.5338, 1.698, 1.8622]
boost_strength_factor     [0.6667, 0.743, 0.8192]
k_inference_factor       [0.9434, 1.0053, 1.0672]
cnn_percent_on           [0.2734, 0.2969, 0.3204]
cnn_weight_sparsity      [0.5521, 0.6233, 0.6945]
dtype: object

In [208]:
# CIFAR-10 SPARSE
filters = ((df['dataset']=='CIFAR10') & 
           (df['name'].isin(sparse_exps)) &
           (df['optimizer'] != 'Adam'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(5)
  .apply(stats))


Out[208]:
learning_rate            [0.0696, 0.0817, 0.0939]
learning_rate_gamma      [0.2215, 0.3903, 0.5592]
weight_decay              [0.0024, 0.004, 0.0057]
momentum                 [0.4172, 0.4472, 0.4772]
batch_size                  [128.0, 128.0, 128.0]
batches_in_epoch            [500.0, 500.0, 500.0]
boost_strength            [1.5338, 1.698, 1.8622]
boost_strength_factor     [0.6667, 0.743, 0.8192]
k_inference_factor       [0.9434, 1.0053, 1.0672]
cnn_percent_on           [0.2734, 0.2969, 0.3204]
cnn_weight_sparsity      [0.5521, 0.6233, 0.6945]
dtype: object

In [209]:
# CIFAR-100 SPARSE
filters = ((df['dataset']=='CIFAR100') & 
           (df['name'].isin(sparse_exps)) &
           (df['optimizer'] != 'Adam'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(5)
  .apply(stats))


Out[209]:
learning_rate               [0.0888, 0.1019, 0.1151]
learning_rate_gamma          [0.0613, 0.121, 0.1807]
weight_decay                [0.0004, 0.0006, 0.0008]
momentum                    [0.2698, 0.4052, 0.5407]
batch_size                [71.0465, 102.4, 133.7535]
batches_in_epoch         [377.5694, 454.8, 532.0306]
boost_strength              [1.1203, 1.4171, 1.7139]
boost_strength_factor       [0.6079, 0.7184, 0.8288]
k_inference_factor          [0.9137, 0.9898, 1.0659]
cnn_percent_on              [0.3158, 0.3285, 0.3412]
cnn_weight_sparsity         [0.8039, 0.8485, 0.8931]
dtype: object

In [195]:
# CIFAR-100 SPARSE
filters = ((df['dataset']=='CIFAR100') & 
           (df['name'].isin(sparse_exps)) &
           (df['optimizer'] != 'Adam'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(5)
  .apply(stats))


Out[195]:
learning_rate               [0.0888, 0.1019, 0.1151]
learning_rate_gamma          [0.0613, 0.121, 0.1807]
weight_decay                [0.0004, 0.0006, 0.0008]
momentum                    [0.2698, 0.4052, 0.5407]
batch_size                [71.0465, 102.4, 133.7535]
batches_in_epoch         [377.5694, 454.8, 532.0306]
boost_strength              [1.1203, 1.4171, 1.7139]
boost_strength_factor       [0.6079, 0.7184, 0.8288]
k_inference_factor          [0.9137, 0.9898, 1.0659]
cnn_percent_on              [0.3158, 0.3285, 0.3412]
cnn_weight_sparsity         [0.8039, 0.8485, 0.8931]
dtype: object

In [196]:
# CIFAR-100 DENSE
filters = ((df['dataset']=='CIFAR100') & 
           (df['name'].isin(dense_exps)) &
           (df['optimizer'] != 'Adam'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(5)
  .apply(stats))


Out[196]:
learning_rate               [0.0724, 0.0855, 0.0986]
learning_rate_gamma         [0.1115, 0.1926, 0.2738]
weight_decay                [0.0003, 0.0024, 0.0044]
momentum                     [0.488, 0.5291, 0.5703]
batch_size                     [128.0, 128.0, 128.0]
batches_in_epoch         [411.0102, 460.0, 508.9898]
boost_strength                       [1.5, 1.5, 1.5]
boost_strength_factor             [0.85, 0.85, 0.85]
k_inference_factor                   [1.0, 1.0, 1.0]
cnn_percent_on                       [1.0, 1.0, 1.0]
cnn_weight_sparsity                  [1.0, 1.0, 1.0]
dtype: object

Best results on original experiment


In [201]:
# CIFAR-100 SPARSE - VGG19SparseFull
filters = ((df['dataset']=='CIFAR100') & 
           (df['optimizer'] != 'Adam') &
           (df['name'] == 'VGG19SparseFull'))
         
(df[filters]
  .sort_values('mean_accuracy_max', ascending=False)[tunable_params]
  .head(10)
  .apply(stats))


Out[201]:
learning_rate                [0.066, 0.0884, 0.1108]
learning_rate_gamma          [0.063, 0.1146, 0.1662]
weight_decay                [0.0004, 0.0007, 0.0009]
momentum                    [0.3304, 0.4923, 0.6542]
batch_size                [79.4715, 108.8, 138.1285]
batches_in_epoch         [389.4726, 469.0, 548.5274]
boost_strength              [1.1116, 1.4042, 1.6968]
boost_strength_factor       [0.5602, 0.6985, 0.8369]
k_inference_factor          [0.8969, 0.9903, 1.0838]
cnn_percent_on              [0.2813, 0.3133, 0.3452]
cnn_weight_sparsity           [0.8023, 0.86, 0.9177]
dtype: object

In [221]:
# CIFAR-100 SPARSE - VGG19SparseFull
filters = ((df['dataset']=='CIFAR100') & 
           (df['optimizer'] != 'Adam') &
           (df['name'] == 'VGG19SparseFull'))
         
(df[filters]
  .sort_values('test_accuracy_max', ascending=False)[tunable_params]
  .head(10)
  .apply(stats))


Out[221]:
learning_rate               [0.0865, 0.1061, 0.1257]
learning_rate_gamma         [0.0663, 0.1031, 0.1399]
weight_decay                [0.0007, 0.0008, 0.0009]
momentum                     [0.3612, 0.541, 0.7209]
batch_size                [79.4715, 108.8, 138.1285]
batches_in_epoch         [399.0448, 478.4, 557.7552]
boost_strength              [1.1288, 1.4225, 1.7163]
boost_strength_factor        [0.534, 0.6551, 0.7762]
k_inference_factor           [0.9117, 0.998, 1.0842]
cnn_percent_on              [0.2672, 0.3028, 0.3385]
cnn_weight_sparsity          [0.6838, 0.817, 0.9501]
dtype: object

In [222]:
# CIFAR-100 SPARSE - VGG19SparseFull
filters = ((df['dataset']=='CIFAR100') & 
           (df['optimizer'] != 'Adam') &
           (df['name'] == 'VGG19SparseFull'))
         
(df[filters]
  .sort_values('noise_accuracy_max', ascending=False)[tunable_params]
  .head(10)
  .apply(stats))


Out[222]:
learning_rate               [0.0521, 0.0688, 0.0855]
learning_rate_gamma         [0.0771, 0.1243, 0.1714]
weight_decay                [0.0004, 0.0007, 0.0009]
momentum                    [0.3749, 0.5167, 0.6585]
batch_size                [71.0465, 102.4, 133.7535]
batches_in_epoch         [381.7528, 469.4, 557.0472]
boost_strength              [1.1072, 1.4464, 1.7856]
boost_strength_factor       [0.5765, 0.7248, 0.8731]
k_inference_factor          [0.9118, 0.9959, 1.0801]
cnn_percent_on              [0.2197, 0.2663, 0.3128]
cnn_weight_sparsity          [0.5639, 0.777, 0.9901]
dtype: object

In [210]:
metrics = ['epochs', 'mean_accuracy_max', 'test_accuracy_max', 'noise_accuracy_max']
(df[df['name'].str.startswith('VGG19SparseFull')][metrics]
              .sort_values(['mean_accuracy_max'], ascending=False)
              .iloc[:10])


Out[210]:
epochs mean_accuracy_max test_accuracy_max noise_accuracy_max
259 164 0.48625 0.6923 0.2808
417 164 0.48490 0.6660 0.3203
403 164 0.48375 0.6757 0.3014
440 164 0.48325 0.6955 0.3005
456 164 0.48260 0.6984 0.2762
292 164 0.48010 0.6597 0.3216
349 90 0.47985 0.7005 0.2821
245 164 0.47960 0.6822 0.3143
307 164 0.47875 0.6954 0.2872
443 90 0.47870 0.6990 0.2932

In [211]:
metrics = ['epochs', 'mean_accuracy_max', 'test_accuracy_max', 'noise_accuracy_max']
(df[df['name'].str.startswith('C100_DenseSGD')][metrics]
              .sort_values(['mean_accuracy_max'], ascending=False)
              .iloc[:10])


Out[211]:
epochs mean_accuracy_max test_accuracy_max noise_accuracy_max
30 42 0.47185 0.6946 0.2594
28 51 0.47050 0.7124 0.2676
31 42 0.46985 0.7124 0.2433
23 91 0.46515 0.7024 0.2434
26 42 0.46320 0.7057 0.2495
27 43 0.46165 0.6996 0.2336
32 42 0.45705 0.6980 0.2203
20 164 0.44995 0.6766 0.2313
24 89 0.44970 0.6759 0.2442
22 95 0.44805 0.6798 0.2339

Learning Rate Decay Analysis


In [250]:
filters = (df['name'].str.startswith('C100_SparseAdam'))
         
(df[filters]
  [['start_learning_rate', 'end_learning_rate', 'early_stop', 'test_accuracy', 'epochs']]
  .head(20))


Out[250]:
start_learning_rate end_learning_rate early_stop test_accuracy epochs
35 0.0010 1.000000e-03 0 0.3723 164
36 0.0100 1.000000e-02 0 0.2326 164
37 0.0030 3.000000e-03 1 0.1720 99
38 0.0001 1.000000e-04 1 0.6136 96
39 0.0030 3.000000e-03 1 0.4469 96
40 0.1000 1.000000e-01 1 0.0546 82
41 0.0003 4.733613e-05 1 0.5884 49
42 0.0300 1.440304e-03 0 0.0384 20
43 0.1000 3.342565e-06 1 0.0100 192
44 0.0003 4.296152e-05 1 0.6153 49
45 0.0010 1.000000e-03 0 0.2946 20
46 0.0010 1.000000e-03 0 0.3830 20
47 0.1000 1.114699e-02 0 0.0247 20
48 0.0001 3.310691e-06 1 0.6552 51
49 0.0030 2.121552e-03 0 0.1680 20
50 0.0003 3.505160e-07 1 0.5390 48
51 0.1000 1.286253e-03 0 0.0100 20
52 0.0003 8.182201e-05 1 0.5712 57
53 0.0003 1.645728e-04 1 0.5942 51
54 0.1000 1.567386e-02 0 0.0100 20

In [251]:
filters = (df['name'].str.startswith('C100_SparseSGD'))
         
(df[filters]
  [['start_learning_rate', 'end_learning_rate', 'early_stop', 'test_accuracy', 'epochs']]
  .head(20))


Out[251]:
start_learning_rate end_learning_rate early_stop test_accuracy epochs
71 0.062032 0.062032 0 0.6722 164
72 0.073143 0.073143 1 0.6720 102
73 0.089823 0.089823 1 0.6507 102
74 0.094103 0.094103 1 0.6640 98
75 0.089010 0.089010 1 0.5900 84
76 0.084460 0.084460 1 0.4692 81
77 0.077336 0.001499 1 0.6255 42
78 0.083494 0.062876 0 0.2694 20
79 0.097652 0.024427 0 0.4170 20
80 0.074639 0.036643 0 0.3660 20
81 0.101649 0.101649 0 0.3083 20
82 0.101533 0.101533 0 0.2728 20
83 0.071089 0.000323 1 0.6411 54
84 0.089973 0.051376 0 0.3307 20
85 0.099974 0.051785 0 0.2041 20
86 0.103079 0.103079 0 0.1489 20
87 0.115307 0.077382 0 0.1639 20
88 0.079804 0.000004 1 0.6663 62
89 0.096964 0.000001 1 0.5393 54
90 0.071324 0.009044 0 0.5451 60

In [ ]: