Experiment:

Evaluate pruning by magnitude weighted by coactivations (more thorough evaluation), compare it to baseline (SET).

Motivation.

Check if results are consistently above baseline.

Conclusion

  • No significant difference between both models
  • No support for early stopping

In [44]:
from IPython.display import Markdown, display
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [45]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data


In [46]:
base = os.path.join('gsc-smalldense-2019-10-11-exp1')
exps = [
    os.path.join(base, exp) for exp in [
        'gsc-smalldense'
    ]
]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)


gsc-smalldense

In [51]:
df


Out[51]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... lr_gamma lr_milestones lr_scheduler model momentum net_params network optim_alg test_noise weight_decay
0 0_equivalent_on_perc=0.02,model=BaseModel 0.722293 92 0.256079 0 0.705961 0.714383 0.792712 45 0.286050 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
1 1_equivalent_on_perc=0.04,model=BaseModel 0.836149 92 0.224685 0 0.826238 0.831169 0.905172 65 0.305643 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
2 2_equivalent_on_perc=0.06,model=BaseModel 0.874426 65 0.288595 0 0.864466 0.870813 0.936912 78 0.447492 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
3 3_equivalent_on_perc=0.08,model=BaseModel 0.894200 92 0.300410 0 0.880163 0.891710 0.942006 78 0.298197 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
4 4_equivalent_on_perc=0.1,model=BaseModel 0.894737 96 0.327995 0 0.887194 0.890831 0.949451 55 0.486677 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
5 5_equivalent_on_perc=0.02,model=BaseModel 0.714481 92 0.169905 0 0.698662 0.707450 0.781740 85 0.137539 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
6 6_equivalent_on_perc=0.04,model=BaseModel 0.836588 98 0.231081 0 0.824187 0.830876 0.907132 98 0.296630 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
7 7_equivalent_on_perc=0.06,model=BaseModel 0.872913 93 0.251343 0 0.862465 0.869544 0.928292 46 0.378918 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
8 8_equivalent_on_perc=0.08,model=BaseModel 0.895030 91 0.317547 0 0.882873 0.894444 0.943182 45 0.528997 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
9 9_equivalent_on_perc=0.1,model=BaseModel 0.901767 96 0.305146 0 0.890562 0.890196 0.949451 32 0.391066 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
10 10_equivalent_on_perc=0.02,model=BaseModel 0.706425 61 0.174202 0 0.522776 0.688849 0.789577 99 0.087774 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
11 11_equivalent_on_perc=0.04,model=BaseModel 0.838688 98 0.252856 0 0.827361 0.837076 0.909875 66 0.375784 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
12 12_equivalent_on_perc=0.06,model=BaseModel 0.873499 80 0.214530 1 0.863563 0.868519 0.928292 76 0.126567 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
13 13_equivalent_on_perc=0.08,model=BaseModel 0.897422 96 0.295821 0 0.886437 0.889806 0.943966 92 0.464342 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
14 14_equivalent_on_perc=0.1,model=BaseModel 0.901572 90 0.302217 0 0.890831 0.890587 0.951019 77 0.505486 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
15 15_equivalent_on_perc=0.02,model=BaseModel 0.711015 92 0.175471 0 0.701299 0.707206 0.793103 97 0.157524 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
16 16_equivalent_on_perc=0.04,model=BaseModel 0.836784 93 0.233083 0 0.826018 0.829948 0.913793 99 0.321317 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
17 17_equivalent_on_perc=0.06,model=BaseModel 0.873694 96 0.244751 0 0.857802 0.868909 0.929075 63 0.359326 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
18 18_equivalent_on_perc=0.08,model=BaseModel 0.898399 91 0.236989 0 0.886144 0.890440 0.941614 25 0.346003 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
19 19_equivalent_on_perc=0.1,model=BaseModel 0.905869 92 0.266771 0 0.895127 0.902402 0.950235 92 0.366379 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
20 20_equivalent_on_perc=0.02,model=BaseModel 0.717117 98 0.156625 0 0.705180 0.709403 0.784875 50 0.152038 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
21 21_equivalent_on_perc=0.04,model=BaseModel 0.835075 92 0.260668 0 0.820940 0.829704 0.911834 73 0.404781 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
22 22_equivalent_on_perc=0.06,model=BaseModel 0.873303 94 0.263988 0 0.863099 0.869007 0.929075 50 0.370298 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
23 23_equivalent_on_perc=0.08,model=BaseModel 0.893419 96 0.250952 0 0.880871 0.889220 0.938871 76 0.344828 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
24 24_equivalent_on_perc=0.1,model=BaseModel 0.903086 98 0.331511 0 0.892344 0.901621 0.949843 45 0.429075 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
25 25_equivalent_on_perc=0.02,model=BaseModel 0.716922 96 0.172542 0 0.702544 0.713505 0.792712 74 0.145376 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
26 26_equivalent_on_perc=0.04,model=BaseModel 0.834977 93 0.228347 0 0.824382 0.826433 0.911442 87 0.272335 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
27 27_equivalent_on_perc=0.06,model=BaseModel 0.874133 95 0.291964 0 0.863588 0.867493 0.929859 94 0.439655 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
28 28_equivalent_on_perc=0.08,model=BaseModel 0.896250 98 0.306171 0 0.880798 0.874573 0.947884 92 0.463166 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
29 29_equivalent_on_perc=0.1,model=BaseModel 0.899814 95 0.327702 0 0.884923 0.897422 0.947100 97 0.473354 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
30 30_equivalent_on_perc=0.02,model=BaseModel 0.718826 92 0.174397 0 0.704790 0.712772 0.784875 23 0.130486 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
31 31_equivalent_on_perc=0.04,model=BaseModel 0.837858 92 0.171712 0 0.822478 0.834098 0.907132 91 0.141066 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
32 32_equivalent_on_perc=0.06,model=BaseModel 0.875256 91 0.272190 0 0.865223 0.869007 0.929859 83 0.399295 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
33 33_equivalent_on_perc=0.08,model=BaseModel 0.893467 80 0.309833 0 0.883068 0.888195 0.945533 96 0.458856 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
34 34_equivalent_on_perc=0.1,model=BaseModel 0.899473 97 0.287374 0 0.886827 0.896494 0.951411 85 0.429467 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
35 35_equivalent_on_perc=0.02,model=BaseModel 0.717410 95 0.253198 0 0.705546 0.713456 0.783699 49 0.318574 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
36 36_equivalent_on_perc=0.04,model=BaseModel 0.839274 90 0.170735 0 0.824358 0.829509 0.907915 62 0.138323 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
37 37_equivalent_on_perc=0.06,model=BaseModel 0.873059 96 0.285324 0 0.863270 0.870228 0.936129 67 0.465909 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
38 38_equivalent_on_perc=0.08,model=BaseModel 0.896446 99 0.303877 0 0.882092 0.896446 0.947884 45 0.466301 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01
39 39_equivalent_on_perc=0.1,model=BaseModel 0.900303 93 0.294991 0 0.890880 0.897666 0.948276 96 0.493339 ... 0.9 60.0 MultiStepLR BaseModel 0 {'boost_strength': 1.5, 'boost_strength_factor... small_dense_gsc SGD False 0.01

40 rows × 40 columns


In [52]:
df.columns


Out[52]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'val_acc_all',
       'epochs', 'experiment_file_name', 'experiment_base_path', 'trial_time',
       'mean_epoch_time', 'scatter_plot_dicts', 'batch_size_test',
       'batch_size_train', 'data_dir', 'dataset_name', 'debug_small_dense',
       'debug_sparse', 'debug_weights', 'device', 'equivalent_on_perc',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'net_params', 'network', 'optim_alg', 'test_noise',
       'weight_decay'],
      dtype='object')

In [53]:
df.shape


Out[53]:
(40, 40)

In [54]:
df.iloc[1]


Out[54]:
Experiment Name                 1_equivalent_on_perc=0.04,model=BaseModel
train_acc_max                                                    0.836149
train_acc_max_epoch                                                    92
train_acc_min                                                    0.224685
train_acc_min_epoch                                                     0
train_acc_median                                                 0.826238
train_acc_last                                                   0.831169
val_acc_max                                                      0.905172
val_acc_max_epoch                                                      65
val_acc_min                                                      0.305643
val_acc_min_epoch                                                       0
val_acc_median                                                   0.881661
val_acc_last                                                      0.88127
val_acc_all             0     0.305643
1     0.498433
2     0.567790
3...
epochs                                                                100
experiment_file_name    /Users/mcaporale/nta/results/gsc-smalldense-20...
experiment_base_path                                       gsc-smalldense
trial_time                                                        13.2269
mean_epoch_time                                                  0.132269
scatter_plot_dicts                                                     {}
batch_size_test                                                      1000
batch_size_train                                                       10
data_dir                                               ~/nta/datasets/gsc
dataset_name                                              PreprocessedGSC
debug_small_dense                                                    True
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
equivalent_on_perc                                                   0.04
learning_rate                                                        0.01
lr_gamma                                                              0.9
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                           BaseModel
momentum                                                                0
net_params              {'boost_strength': 1.5, 'boost_strength_factor...
network                                                   small_dense_gsc
optim_alg                                                             SGD
test_noise                                                          False
weight_decay                                                         0.01
Name: 1, dtype: object

In [56]:
df.groupby('equivalent_on_perc')['equivalent_on_perc'].count()


Out[56]:
equivalent_on_perc
0.02    8
0.04    8
0.06    8
0.08    8
0.10    8
Name: equivalent_on_perc, dtype: int64

Analysis

Experiment Details

# experiment configurations base_exp_config = dict( device=("cuda" if torch.cuda.device_count() > 0 else "cpu"), # dataset related dataset_name="PreprocessedGSC", data_dir="~/nta/datasets/gsc", # batch_size_train=(4, 16), # batch_size_test=(1000), # # ----- Optimizer Related ---- # optim_alg="SGD", # momentum=0.0, # learning_rate=0.01, # weight_decay=1e-2, # # ----- LR Scheduler Related ---- # lr_scheduler="StepLR", # lr_step_size=1, # lr_gamma=0.9, batch_size_train=(4, 16), batch_size_test=1000, optim_alg="SGD", momentum=0, # 0.9, learning_rate=0.01, # 0.1, weight_decay=0.01, # 1e-4, lr_scheduler="MultiStepLR", lr_milestones=[30, 60, 90], lr_gamma=0.9, # 0.1, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations experiment_name = "gsc-smalldense-2019-10-11-exp1" tune_config = dict( name=experiment_name, num_samples=8, local_dir=os.path.expanduser(os.path.join("~/nta/results", experiment_name)), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={ "cpu": os.cpu_count() / 8, "gpu": 0.5, }, loggers=DEFAULT_LOGGERS, verbose=1, config=base_exp_config, ) # define experiments net_params = dict( boost_strength=1.5, boost_strength_factor=0.9, k_inference_factor=1.5, duty_cycle_period=1000 ) experiments = { "gsc-smalldense": dict( model=ray.tune.grid_search(["BaseModel"]), network="small_dense_gsc", net_params=net_params, equivalent_on_perc=ray.tune.grid_search([ 0.02, 0.04, 0.06, 0.08, 0.10, ]), debug_small_dense=True, ), }

In [57]:
# Did any  trials failed?
df[df["epochs"]<100]["epochs"].count()


Out[57]:
0

In [58]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[58]:
(40, 40)

In [59]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']


Out[59]:
Series([], Name: epochs, dtype: int64)

In [60]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
Does improved weight pruning outperforms regular SET

In [108]:
df_agg = agg(['equivalent_on_perc'])
df_agg


Out[108]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
equivalent_on_perc
0.02 65 0.782 0.793 0.788 0.005 8
0.04 80 0.905 0.914 0.909 0.003 8
0.06 70 0.928 0.937 0.931 0.004 8
0.08 69 0.939 0.948 0.944 0.003 8
0.10 72 0.947 0.951 0.950 0.001 8

In [109]:
equivalent_on_percs = df_agg.index.values

val_means = df_agg['val_acc_max']['mean']
val_means = np.array(val_means)
val_stds = df_agg['val_acc_max']['std']
val_stds = np.array(val_stds)

In [110]:
print(equivalent_on_percs)
print(val_means)
print(val_stds)


[0.02 0.04 0.06 0.08 0.1 ]
[0.788 0.909 0.931 0.944 0.95 ]
[0.005 0.003 0.004 0.003 0.001]

In [39]:
# translate model names
rcParams['figure.figsize'] = 16, 8
# d = {
#     'DSNNWeightedMag': 'DSNN',
#     'DSNNMixedHeb': 'SET',
#     'SparseModel': 'Static',        
# }
# df_plot = df.copy()
# df_plot['model'] = df_plot['model'].apply(lambda x, i: model_name(x, i))

In [117]:
plt.errorbar(
    x=equivalent_on_percs,
    y=val_means,
    yerr=val_stds,
    color='k',
    marker='*',
    lw=0,
    elinewidth=2,
    capsize=2,
    markersize=10,
    label="Small-Dense Equivalents"
)
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
# sns.lineplot(data=df, x='equivalent_on_perc', y='val_acc_max', hue='equivalent_on_perc')


Out[117]:
<ErrorbarContainer object of 3 artists>