In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import sys
sys.path.append("../../")



In [28]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data



In [29]:

    
exps = ['neurips_1_eval1', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [30]:

    
df.head(5)









    Out[30]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      lr_scheduler
      model
      momentum
      network
      num_classes
      on_perc
      optim_alg
      pruning_early_stop
      test_noise
      weight_decay
    
  
  
    
      0
      0_pruning_early_stop=0
      0.998533
      73
      0.910383
      0
      0.997575
      0.998417
      0.9807
      86
      0.9454
      ...
      MultiStepLR
      SET
      0.9
      MLP
      10
      0.1
      SGD
      0
      False
      0.0001
    
    
      1
      1_pruning_early_stop=1
      0.999000
      96
      0.909583
      0
      0.997783
      0.998367
      0.9814
      69
      0.9513
      ...
      MultiStepLR
      SET
      0.9
      MLP
      10
      0.1
      SGD
      1
      False
      0.0001
    
    
      2
      2_pruning_early_stop=2
      0.998033
      99
      0.907067
      0
      0.997092
      0.998033
      0.9795
      35
      0.9560
      ...
      MultiStepLR
      SET
      0.9
      MLP
      10
      0.1
      SGD
      2
      False
      0.0001
    
    
      3
      3_pruning_early_stop=3
      0.998467
      74
      0.911750
      0
      0.996508
      0.996817
      0.9828
      35
      0.9480
      ...
      MultiStepLR
      SET
      0.9
      MLP
      10
      0.1
      SGD
      3
      False
      0.0001
    
    
      4
      4_pruning_early_stop=0
      0.998350
      78
      0.911233
      0
      0.997158
      0.997400
      0.9816
      37
      0.9531
      ...
      MultiStepLR
      SET
      0.9
      MLP
      10
      0.1
      SGD
      0
      False
      0.0001
    
  

5 rows × 41 columns



In [31]:

    
df.columns









    Out[31]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'kwinners', 'learning_rate', 'lr_gamma', 'lr_milestones',
       'lr_scheduler', 'model', 'momentum', 'network', 'num_classes',
       'on_perc', 'optim_alg', 'pruning_early_stop', 'test_noise',
       'weight_decay'],
      dtype='object')



In [32]:

    
df.shape









    Out[32]:





(10, 41)



In [33]:

    
df.iloc[1]









    Out[33]:





Experiment Name                                    1_pruning_early_stop=1
train_acc_max                                                       0.999
train_acc_max_epoch                                                    96
train_acc_min                                                    0.909583
train_acc_min_epoch                                                     0
train_acc_median                                                 0.997783
train_acc_last                                                   0.998367
val_acc_max                                                        0.9814
val_acc_max_epoch                                                      69
val_acc_min                                                        0.9513
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9803
val_acc_last                                                       0.9805
epochs                                                                100
experiment_file_name    /Users/lsouza/nta/results/neurips_1_eval1/expe...
trial_time                                                        20.0659
mean_epoch_time                                                  0.200659
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                    0.3
hidden_sizes                                                          100
input_size                                                            784
kwinners                                                            False
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                                 SET
momentum                                                              0.9
network                                                               MLP
num_classes                                                            10
on_perc                                                               0.1
optim_alg                                                             SGD
pruning_early_stop                                                      1
test_noise                                                          False
weight_decay                                                       0.0001
Name: 1, dtype: object



In [39]:

    
df.groupby('model')['model'].count()









    Out[39]:





model
SET    10
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLP", # "MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, kwinners=False, # model related model="SET", #"DSNNMixedHeb", on_perc=0.1, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=0.3, pruning_early_stop=1, #tune.grid_search([None, 1, 2, 3]), hebbian_grow=False, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, stop={"training_iteration": 100}, )



In [40]:

    
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()









    Out[40]:





0



In [41]:

    
# Removing failed trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape









    Out[41]:





(10, 41)



In [42]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<100
df_origin[df_origin['failed']]['epochs']









    Out[42]:





8    80
9    63
Name: epochs, dtype: int64



In [43]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

Did Hebbian perform better than SET?



In [44]:

    
agg(['model'])









    Out[44]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      model
      
      
      
      
      
      
    
  
  
    
      SET
      56
      0.98
      0.983
      0.981
      0.001
      10



In [12]:

    
high_sparsity = (df['on_perc']==0.05)
avg_sparsity = (df['on_perc']==0.1)
low_sparsity = (df['on_perc']==0.2)



In [13]:

    
agg(['kwinners'], low_sparsity)









    Out[13]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      kwinners
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      False
      21
      0.932
      0.978
      0.963
      0.009
      19
      0.908
      0.963
      0.948
      0.011
      69
    
    
      True
      22
      0.938
      0.976
      0.964
      0.008
      20
      0.911
      0.963
      0.949
      0.010
      68



In [14]:

    
agg(['kwinners'], high_sparsity)









    Out[14]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      kwinners
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      False
      24
      0.398
      0.969
      0.904
      0.128
      22
      0.392
      0.941
      0.872
      0.121
      37
    
    
      True
      23
      0.295
      0.967
      0.882
      0.166
      22
      0.292
      0.937
      0.851
      0.157
      39

No evidence of significant difference. In networks with high sparsity, the impact of kWinners is worst, which is expected since kWinners (at 30%) will make the activations more sparse than ReLU (which is 50% sparse on average)

What is the optimal level of weight sparsity?



In [15]:

    
agg(['on_perc'])









    Out[15]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      on_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.05
      24
      0.295
      0.969
      0.893
      0.148
      22
      0.292
      0.941
      0.861
      0.140
      76
    
    
      0.10
      22
      0.888
      0.974
      0.952
      0.020
      21
      0.851
      0.954
      0.929
      0.024
      130
    
    
      0.20
      22
      0.932
      0.978
      0.963
      0.009
      19
      0.908
      0.963
      0.948
      0.010
      137

Sparsity at 80 and 90% levels seem more or less equivalent, difference is 1 point in accuracy. The jump from 90 to 95% shows a drastic increase in acc, of 6 points.

Hebbian grow helps learning?



In [16]:

    
agg(['hebbian_grow'])









    Out[16]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_grow
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      False
      22
      0.888
      0.978
      0.956
      0.017
      21
      0.851
      0.963
      0.932
      0.022
      176
    
    
      True
      22
      0.295
      0.972
      0.930
      0.106
      20
      0.292
      0.960
      0.911
      0.105
      167



In [17]:

    
agg(['hebbian_grow'], low_sparsity)









    Out[17]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_grow
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      False
      20
      0.932
      0.978
      0.962
      0.011
      18
      0.908
      0.963
      0.945
      0.013
      65
    
    
      True
      23
      0.938
      0.972
      0.965
      0.005
      21
      0.914
      0.960
      0.951
      0.006
      72



In [18]:

    
agg(['hebbian_grow'], high_sparsity)









    Out[18]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_grow
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      False
      25
      0.905
      0.969
      0.951
      0.016
      24
      0.853
      0.941
      0.916
      0.019
      48
    
    
      True
      22
      0.295
      0.954
      0.793
      0.210
      19
      0.292
      0.923
      0.769
      0.200
      28

No strong evidence it helps in low sparsity case. In high sparsity (95%), seems very harmful

Hebbian pruning helps learning?



In [19]:

    
agg(['hebbian_prune_perc'])









    Out[19]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      16
      0.888
      0.971
      0.943
      0.024
      16
      0.851
      0.959
      0.923
      0.033
      31
    
    
      0.1
      20
      0.900
      0.969
      0.942
      0.019
      20
      0.868
      0.957
      0.922
      0.026
      45
    
    
      0.2
      23
      0.402
      0.970
      0.928
      0.102
      22
      0.396
      0.960
      0.907
      0.101
      64
    
    
      0.3
      23
      0.538
      0.968
      0.939
      0.078
      22
      0.530
      0.952
      0.918
      0.077
      67
    
    
      0.4
      24
      0.398
      0.974
      0.953
      0.073
      22
      0.392
      0.960
      0.931
      0.072
      67
    
    
      0.5
      23
      0.295
      0.978
      0.953
      0.085
      19
      0.292
      0.963
      0.930
      0.084
      69



In [20]:

    
agg(['hebbian_prune_perc'], low_sparsity)









    Out[20]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      18
      0.932
      0.971
      0.956
      0.011
      18
      0.908
      0.959
      0.941
      0.016
      17
    
    
      0.1
      22
      0.945
      0.969
      0.955
      0.008
      22
      0.925
      0.957
      0.940
      0.011
      24
    
    
      0.2
      22
      0.938
      0.970
      0.961
      0.006
      21
      0.914
      0.960
      0.946
      0.009
      24
    
    
      0.3
      23
      0.960
      0.968
      0.964
      0.002
      21
      0.945
      0.952
      0.950
      0.002
      24
    
    
      0.4
      23
      0.964
      0.974
      0.969
      0.003
      20
      0.948
      0.960
      0.954
      0.003
      24
    
    
      0.5
      21
      0.967
      0.978
      0.972
      0.003
      15
      0.950
      0.963
      0.957
      0.004
      24



In [21]:

    
agg(['hebbian_prune_perc'], high_sparsity)









    Out[21]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.1
      29
      0.900
      0.900
      0.900
      NaN
      29
      0.879
      0.879
      0.879
      NaN
      1
    
    
      0.2
      25
      0.402
      0.952
      0.847
      0.185
      24
      0.396
      0.920
      0.815
      0.174
      16
    
    
      0.3
      21
      0.538
      0.952
      0.882
      0.131
      21
      0.530
      0.926
      0.852
      0.124
      19
    
    
      0.4
      25
      0.398
      0.966
      0.918
      0.133
      23
      0.392
      0.937
      0.886
      0.126
      19
    
    
      0.5
      24
      0.295
      0.969
      0.914
      0.149
      21
      0.292
      0.941
      0.882
      0.142
      21

There is good evidence it helps. The trend is very clear in the low sparsity (80% sparse) cases.



In [22]:

    
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], no_magnitude)









    Out[22]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      15
      0.895
      0.971
      0.939
      0.032
      15
      0.853
      0.959
      0.915
      0.047
      5
    
    
      0.1
      18
      0.905
      0.965
      0.940
      0.022
      17
      0.880
      0.953
      0.920
      0.028
      8
    
    
      0.2
      23
      0.402
      0.970
      0.905
      0.167
      22
      0.396
      0.960
      0.885
      0.163
      11
    
    
      0.3
      21
      0.561
      0.964
      0.906
      0.124
      21
      0.549
      0.951
      0.886
      0.122
      12
    
    
      0.4
      24
      0.938
      0.968
      0.960
      0.010
      22
      0.903
      0.954
      0.934
      0.017
      12
    
    
      0.5
      22
      0.942
      0.973
      0.963
      0.009
      16
      0.901
      0.954
      0.935
      0.017
      12



In [23]:

    
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], (no_magnitude & low_sparsity))









    Out[23]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      24
      0.959
      0.971
      0.965
      0.009
      29
      0.947
      0.959
      0.953
      0.008
      2
    
    
      0.1
      23
      0.947
      0.965
      0.956
      0.009
      21
      0.931
      0.953
      0.942
      0.012
      4
    
    
      0.2
      21
      0.958
      0.970
      0.963
      0.005
      20
      0.941
      0.960
      0.948
      0.009
      4
    
    
      0.3
      24
      0.963
      0.964
      0.964
      0.001
      24
      0.945
      0.951
      0.948
      0.003
      4
    
    
      0.4
      22
      0.966
      0.968
      0.967
      0.001
      24
      0.948
      0.954
      0.950
      0.003
      4
    
    
      0.5
      20
      0.968
      0.973
      0.971
      0.002
      10
      0.950
      0.954
      0.952
      0.002
      4

Results seem similar even when no magnitude pruning is involved, only hebbian pruning

Magnitude pruning helps learning?



In [24]:

    
agg(['weight_prune_perc'])









    Out[24]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      21
      0.402
      0.973
      0.935
      0.091
      19
      0.396
      0.960
      0.912
      0.090
      60
    
    
      0.1
      22
      0.585
      0.976
      0.942
      0.060
      21
      0.573
      0.960
      0.919
      0.061
      62
    
    
      0.2
      24
      0.862
      0.976
      0.957
      0.018
      23
      0.836
      0.960
      0.936
      0.022
      56
    
    
      0.3
      22
      0.458
      0.976
      0.946
      0.073
      20
      0.452
      0.961
      0.925
      0.073
      55
    
    
      0.4
      22
      0.398
      0.978
      0.938
      0.094
      22
      0.392
      0.963
      0.918
      0.093
      56
    
    
      0.5
      22
      0.295
      0.976
      0.943
      0.092
      19
      0.292
      0.963
      0.921
      0.091
      54



In [25]:

    
agg(['weight_prune_perc'], low_sparsity)









    Out[25]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      22
      0.947
      0.973
      0.964
      0.007
      21
      0.931
      0.960
      0.949
      0.007
      22
    
    
      0.1
      20
      0.932
      0.976
      0.961
      0.011
      19
      0.908
      0.960
      0.946
      0.013
      23
    
    
      0.2
      22
      0.938
      0.976
      0.964
      0.008
      19
      0.911
      0.960
      0.949
      0.011
      23
    
    
      0.3
      21
      0.940
      0.976
      0.964
      0.009
      18
      0.920
      0.961
      0.949
      0.010
      23
    
    
      0.4
      22
      0.943
      0.978
      0.964
      0.009
      22
      0.921
      0.963
      0.949
      0.010
      23
    
    
      0.5
      22
      0.946
      0.976
      0.964
      0.008
      18
      0.925
      0.963
      0.949
      0.010
      23



In [26]:

    
agg(['weight_prune_perc'], high_sparsity)









    Out[26]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      23
      0.402
      0.962
      0.874
      0.170
      19
      0.396
      0.925
      0.843
      0.161
      15
    
    
      0.1
      27
      0.585
      0.968
      0.904
      0.110
      24
      0.573
      0.941
      0.874
      0.105
      15
    
    
      0.2
      25
      0.862
      0.969
      0.943
      0.029
      24
      0.836
      0.940
      0.911
      0.027
      13
    
    
      0.3
      24
      0.458
      0.966
      0.894
      0.148
      21
      0.452
      0.937
      0.863
      0.140
      12
    
    
      0.4
      22
      0.398
      0.967
      0.864
      0.189
      23
      0.392
      0.936
      0.834
      0.178
      12
    
    
      0.5
      21
      0.295
      0.964
      0.870
      0.217
      21
      0.292
      0.931
      0.833
      0.205
      9



In [27]:

    
agg(['weight_prune_perc'], avg_sparsity)









    Out[27]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      19
      0.895
      0.967
      0.948
      0.021
      18
      0.853
      0.943
      0.922
      0.025
      23
    
    
      0.1
      20
      0.888
      0.970
      0.947
      0.025
      21
      0.851
      0.948
      0.922
      0.032
      24
    
    
      0.2
      27
      0.932
      0.974
      0.959
      0.011
      26
      0.909
      0.954
      0.938
      0.011
      20
    
    
      0.3
      22
      0.912
      0.974
      0.956
      0.015
      22
      0.884
      0.953
      0.935
      0.018
      20
    
    
      0.4
      22
      0.902
      0.973
      0.953
      0.021
      21
      0.873
      0.952
      0.931
      0.024
      21
    
    
      0.5
      22
      0.901
      0.974
      0.951
      0.021
      19
      0.871
      0.950
      0.929
      0.025
      22

In low sparsity cases, results are the same for any amount of pruning. In average and high sparsity, there is a gaussian like curve, with the peak at around 0.2 (maybe extending to 0.3).
Results are consistent with what has been seen in previous experiments and in related papers.
Worth note that although results are better at 0.2, it also takes slightly longer to achieve better results compared to m



In [28]:

    
no_hebbian = (df['hebbian_prune_perc'] == 0)
agg(['weight_prune_perc'], no_hebbian)









    Out[28]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      noise_acc_max_epoch
      noise_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0.0
      15
      0.895
      0.971
      0.939
      0.032
      15
      0.853
      0.959
      0.915
      0.047
      5
    
    
      0.1
      10
      0.888
      0.960
      0.928
      0.030
      10
      0.851
      0.951
      0.903
      0.042
      7
    
    
      0.2
      22
      0.938
      0.966
      0.953
      0.011
      21
      0.911
      0.953
      0.937
      0.016
      5
    
    
      0.3
      13
      0.912
      0.965
      0.944
      0.025
      16
      0.884
      0.953
      0.926
      0.031
      4
    
    
      0.4
      15
      0.902
      0.964
      0.944
      0.025
      16
      0.876
      0.953
      0.926
      0.031
      5
    
    
      0.5
      22
      0.943
      0.963
      0.955
      0.009
      21
      0.929
      0.950
      0.940
      0.011
      5

Somewhat inconsistent result looking at cases where there is no hebbian learning, only pruning by magnitude. There is an anomaly at the last entry where 50% of the weights are pruned - results are similar to 20%.
Number of samples averaged from is a lot lower in this pivot

What is the optimal combination of weight and magnitude pruning?



In [29]:

    
pd.pivot_table(df, 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[29]:







  
    
      weight_prune_perc
      0.0
      0.1
      0.2
      0.3
      0.4
      0.5
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      0.939 ± 0.032
      0.928 ± 0.030
      0.953 ± 0.011
      0.944 ± 0.025
      0.944 ± 0.025
      0.955 ± 0.009
    
    
      0.1
      0.940 ± 0.022
      0.944 ± 0.017
      0.950 ± 0.012
      0.948 ± 0.011
      0.935 ± 0.025
      0.940 ± 0.024
    
    
      0.2
      0.905 ± 0.167
      0.914 ± 0.110
      0.951 ± 0.015
      0.908 ± 0.150
      0.951 ± 0.019
      0.943 ± 0.022
    
    
      0.3
      0.906 ± 0.124
      0.942 ± 0.058
      0.957 ± 0.009
      0.957 ± 0.012
      0.921 ± 0.127
      0.955 ± 0.015
    
    
      0.4
      0.960 ± 0.010
      0.949 ± 0.054
      0.966 ± 0.007
      0.968 ± 0.004
      0.918 ± 0.164
      0.966 ± 0.005
    
    
      0.5
      0.963 ± 0.009
      0.967 ± 0.007
      0.960 ± 0.032
      0.952 ± 0.058
      0.970 ± 0.004
      0.908 ± 0.204



In [40]:

    
pd.pivot_table(df[low_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[40]:







  
    
      
      weight_prune_perc
      0.0
      0.1
      0.2
      0.3
      0.4
      0.5
    
    
      kwinners
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      False
      0.0
      0.971 ± nan
      0.946 ± 0.020
      0.966 ± nan
      0.951 ± 0.016
      0.954 ± 0.015
      0.963 ± nan
    
    
      0.1
      0.957 ± 0.011
      0.952 ± 0.008
      0.959 ± 0.003
      0.955 ± 0.011
      0.955 ± 0.014
      0.954 ± 0.008
    
    
      0.2
      0.960 ± 0.004
      0.948 ± 0.014
      0.960 ± 0.003
      0.963 ± 0.006
      0.963 ± 0.009
      0.960 ± 0.003
    
    
      0.3
      0.963 ± 0.001
      0.965 ± 0.001
      0.963 ± 0.002
      0.966 ± 0.001
      0.964 ± 0.002
      0.964 ± 0.000
    
    
      0.4
      0.967 ± 0.002
      0.968 ± 0.004
      0.969 ± 0.004
      0.970 ± 0.002
      0.970 ± 0.004
      0.969 ± 0.004
    
    
      0.5
      0.970 ± 0.003
      0.971 ± 0.005
      0.971 ± 0.005
      0.972 ± 0.003
      0.974 ± 0.006
      0.973 ± 0.004
    
    
      True
      0.0
      0.959 ± nan
      0.959 ± nan
      0.950 ± 0.017
      0.965 ± nan
      0.963 ± nan
      0.956 ± 0.010
    
    
      0.1
      0.956 ± 0.012
      0.954 ± 0.008
      0.955 ± 0.005
      0.955 ± 0.009
      0.955 ± 0.011
      0.958 ± 0.016
    
    
      0.2
      0.966 ± 0.006
      0.962 ± 0.002
      0.964 ± 0.007
      0.962 ± 0.007
      0.961 ± 0.003
      0.962 ± 0.007
    
    
      0.3
      0.964 ± 0.000
      0.964 ± 0.002
      0.964 ± 0.002
      0.967 ± 0.002
      0.964 ± 0.005
      0.965 ± 0.002
    
    
      0.4
      0.967 ± 0.000
      0.970 ± 0.003
      0.969 ± 0.004
      0.971 ± 0.002
      0.970 ± 0.003
      0.969 ± 0.007
    
    
      0.5
      0.972 ± 0.002
      0.973 ± 0.004
      0.973 ± 0.004
      0.973 ± 0.004
      0.973 ± 0.003
      0.974 ± 0.003



In [41]:

    
pd.pivot_table(df[avg_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[41]:







  
    
      
      weight_prune_perc
      0.0
      0.1
      0.2
      0.3
      0.4
      0.5
    
    
      kwinners
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      False
      0.0
      0.925 ± 0.042
      0.903 ± 0.000
      0.950 ± nan
      0.912 ± nan
      0.902 ± nan
      0.956 ± nan
    
    
      0.1
      0.924 ± 0.026
      0.943 ± 0.011
      0.931 ± nan
      0.936 ± nan
      0.915 ± 0.016
      0.911 ± 0.014
    
    
      0.2
      0.955 ± 0.004
      0.930 ± 0.034
      0.955 ± 0.003
      0.950 ± 0.005
      0.953 ± 0.007
      0.950 ± 0.007
    
    
      0.3
      0.954 ± 0.011
      0.958 ± 0.005
      0.959 ± 0.003
      0.960 ± 0.002
      0.961 ± 0.002
      0.961 ± 0.001
    
    
      0.4
      0.963 ± 0.003
      0.965 ± 0.006
      0.969 ± 0.005
      0.968 ± 0.006
      0.968 ± 0.002
      0.967 ± 0.004
    
    
      0.5
      0.965 ± 0.002
      0.968 ± 0.003
      0.970 ± 0.005
      0.969 ± 0.004
      0.970 ± 0.003
      0.969 ± 0.007
    
    
      True
      0.0
      0.915 ± nan
      0.919 ± 0.044
      0.950 ± nan
      NaN
      0.946 ± nan
      0.943 ± nan
    
    
      0.1
      0.923 ± 0.018
      0.926 ± 0.027
      0.942 ± nan
      0.940 ± 0.004
      0.929 ± nan
      0.937 ± 0.029
    
    
      0.2
      0.949 ± 0.008
      0.954 ± 0.001
      0.950 ± 0.001
      0.956 ± 0.006
      0.957 ± 0.007
      0.928 ± 0.027
    
    
      0.3
      0.954 ± 0.009
      0.961 ± 0.004
      0.959 ± 0.001
      0.962 ± 0.004
      0.960 ± 0.004
      0.958 ± 0.003
    
    
      0.4
      0.964 ± 0.005
      0.966 ± 0.006
      0.967 ± 0.007
      0.964 ± 0.006
      0.964 ± 0.008
      0.966 ± 0.007
    
    
      0.5
      0.965 ± 0.004
      0.968 ± 0.003
      0.970 ± 0.004
      0.970 ± 0.005
      0.970 ± 0.004
      0.968 ± 0.006

There is a more clear trend in the low sparsity case. Results from high sparsity are inconclusive, with several runs failing to "converge"
Weight pruning alone improves the model by up to 0.7% from 10% pruning to 50% magnitude pruning
Hebbian pruning alone improves the model by 1.5%
Both combined can increase from 1.5% seem in hebbian only to 1.8% improvement.
Comparisons above are from 0.1 to 0.5 pruning. There is a question left of why no pruning at both sides - the (0,0) point - it is an anomaly to the trend shown in the pivot.



In [39]:

    
pd.pivot_table(df[avg_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[39]:







  
    
      weight_prune_perc
      0.0
      0.1
      0.2
      0.3
      0.4
      0.5
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      0.921 ± 0.030
      0.911 ± 0.027
      0.950 ± 0.000
      0.912 ± nan
      0.924 ± 0.031
      0.950 ± 0.009
    
    
      0.1
      0.923 ± 0.018
      0.934 ± 0.019
      0.937 ± 0.007
      0.939 ± 0.003
      0.919 ± 0.014
      0.924 ± 0.024
    
    
      0.2
      0.952 ± 0.006
      0.942 ± 0.024
      0.953 ± 0.003
      0.953 ± 0.006
      0.955 ± 0.006
      0.939 ± 0.020
    
    
      0.3
      0.954 ± 0.008
      0.960 ± 0.004
      0.959 ± 0.002
      0.961 ± 0.003
      0.961 ± 0.003
      0.959 ± 0.002
    
    
      0.4
      0.964 ± 0.004
      0.965 ± 0.005
      0.968 ± 0.005
      0.966 ± 0.005
      0.966 ± 0.005
      0.967 ± 0.004
    
    
      0.5
      0.965 ± 0.003
      0.968 ± 0.002
      0.970 ± 0.004
      0.970 ± 0.004
      0.970 ± 0.003
      0.969 ± 0.005



In [31]:

    
pd.pivot_table(df[high_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[31]:







  
    
      weight_prune_perc
      0.0
      0.1
      0.2
      0.3
      0.4
      0.5
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.1
      NaN
      NaN
      NaN
      NaN
      0.900 ± nan
      NaN
    
    
      0.2
      0.764 ± 0.313
      0.821 ± 0.205
      0.932 ± 0.017
      0.776 ± 0.276
      0.919 ± 0.020
      0.916 ± 0.011
    
    
      0.3
      0.802 ± 0.185
      0.901 ± 0.095
      0.945 ± 0.007
      0.940 ± 0.007
      0.811 ± 0.237
      0.927 ± 0.008
    
    
      0.4
      0.948 ± 0.010
      0.912 ± 0.090
      0.959 ± 0.008
      0.965 ± 0.001
      0.817 ± 0.280
      0.960 ± 0.003
    
    
      0.5
      0.953 ± 0.009
      0.960 ± 0.009
      0.937 ± 0.051
      0.912 ± 0.096
      0.965 ± 0.002
      0.740 ± 0.386



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	lr_scheduler	model	momentum	network	num_classes	on_perc	optim_alg	pruning_early_stop	test_noise	weight_decay
0	0_pruning_early_stop=0	0.998533	73	0.910383	0.997575	0.998417	0.9807	86	0.9454	...	MultiStepLR	SET	0.9	MLP	10	0.1	SGD	0	False	0.0001
1	1_pruning_early_stop=1	0.999000	96	0.909583	0.997783	0.998367	0.9814	69	0.9513	...	MultiStepLR	SET	0.9	MLP	10	0.1	SGD	1	False	0.0001
2	2_pruning_early_stop=2	0.998033	99	0.907067	0.997092	0.998033	0.9795	35	0.9560	...	MultiStepLR	SET	0.9	MLP	10	0.1	SGD	2	False	0.0001
3	3_pruning_early_stop=3	0.998467	74	0.911750	0.996508	0.996817	0.9828	35	0.9480	...	MultiStepLR	SET	0.9	MLP	10	0.1	SGD	3	False	0.0001
4	4_pruning_early_stop=0	0.998350	78	0.911233	0.997158	0.997400	0.9816	37	0.9531	...	MultiStepLR	SET	0.9	MLP	10	0.1	SGD	0	False	0.0001

	val_acc_max_epoch	val_acc_max				noise_acc_max_epoch	noise_acc_max				model
	round_mean	min	max	mean	std	round_mean	min	max	mean	std	count
kwinners
False	21	0.932	0.978	0.963	0.009	19	0.908	0.963	0.948	0.011	69
True	22	0.938	0.976	0.964	0.008	20	0.911	0.963	0.949	0.010	68

weight_prune_perc	0.0	0.1	0.2	0.3	0.4	0.5
hebbian_prune_perc
0.0	0.939 ± 0.032	0.928 ± 0.030	0.953 ± 0.011	0.944 ± 0.025	0.944 ± 0.025	0.955 ± 0.009
0.1	0.940 ± 0.022	0.944 ± 0.017	0.950 ± 0.012	0.948 ± 0.011	0.935 ± 0.025	0.940 ± 0.024
0.2	0.905 ± 0.167	0.914 ± 0.110	0.951 ± 0.015	0.908 ± 0.150	0.951 ± 0.019	0.943 ± 0.022
0.3	0.906 ± 0.124	0.942 ± 0.058	0.957 ± 0.009	0.957 ± 0.012	0.921 ± 0.127	0.955 ± 0.015
0.4	0.960 ± 0.010	0.949 ± 0.054	0.966 ± 0.007	0.968 ± 0.004	0.918 ± 0.164	0.966 ± 0.005
0.5	0.963 ± 0.009	0.967 ± 0.007	0.960 ± 0.032	0.952 ± 0.058	0.970 ± 0.004	0.908 ± 0.204

weight_prune_perc	0.0	0.1	0.2	0.3	0.4	0.5
hebbian_prune_perc
0.0	0.921 ± 0.030	0.911 ± 0.027	0.950 ± 0.000	0.912 ± nan	0.924 ± 0.031	0.950 ± 0.009
0.1	0.923 ± 0.018	0.934 ± 0.019	0.937 ± 0.007	0.939 ± 0.003	0.919 ± 0.014	0.924 ± 0.024
0.2	0.952 ± 0.006	0.942 ± 0.024	0.953 ± 0.003	0.953 ± 0.006	0.955 ± 0.006	0.939 ± 0.020
0.3	0.954 ± 0.008	0.960 ± 0.004	0.959 ± 0.002	0.961 ± 0.003	0.961 ± 0.003	0.959 ± 0.002
0.4	0.964 ± 0.004	0.965 ± 0.005	0.968 ± 0.005	0.966 ± 0.005	0.966 ± 0.005	0.967 ± 0.004
0.5	0.965 ± 0.003	0.968 ± 0.002	0.970 ± 0.004	0.970 ± 0.004	0.970 ± 0.003	0.969 ± 0.005

weight_prune_perc	0.0	0.1	0.2	0.3	0.4	0.5
hebbian_prune_perc
0.1	NaN	NaN	NaN	NaN	0.900 ± nan	NaN
0.2	0.764 ± 0.313	0.821 ± 0.205	0.932 ± 0.017	0.776 ± 0.276	0.919 ± 0.020	0.916 ± 0.011
0.3	0.802 ± 0.185	0.901 ± 0.095	0.945 ± 0.007	0.940 ± 0.007	0.811 ± 0.237	0.927 ± 0.008
0.4	0.948 ± 0.010	0.912 ± 0.090	0.959 ± 0.008	0.965 ± 0.001	0.817 ± 0.280	0.960 ± 0.003
0.5	0.953 ± 0.009	0.960 ± 0.009	0.937 ± 0.051	0.912 ± 0.096	0.965 ± 0.002	0.740 ± 0.386