Experiment:

Evaluate pruning by magnitude weighted by coactivations (more thorough evaluation), compare it to baseline (SET), in GSC. Applied only to linear layers

Motivation.

Check if results are consistently above baseline.

Conclusion

# experiment configurations base_exp_config = dict( device="cuda", # dataset related dataset_name="PreprocessedGSC", data_dir=os.path.expanduser("~/nta/datasets/gsc"), batch_size_train=(4, 16), batch_size_test=1000, # network related network="GSCHeb", optim_alg="SGD", momentum=0, learning_rate=0.01, weight_decay=0.01, lr_scheduler="MultiStepLR", lr_milestones=[30, 60, 90], lr_gamma=0.90, use_kwinners=True, # model related model=tune.grid_search(["DSNNWeightedMag", "DSNNMixedHeb", "SparseModel"]), on_perc=tune.grid_search(list(np.arange(0, 0.101, 0.005))), # sparse related hebbian_prune_perc=None, hebbian_grow=False, weight_prune_perc=0.3, pruning_early_stop=2, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_eval3", num_samples=1, # num_samples=3, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={"cpu": 1, "gpu": .25}, loggers=DEFAULT_LOGGERS, verbose=0, )



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data



In [3]:

    
exps = ['improved_mag_gsc_eval3', 'improved_mag_gsc_eval20']
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [4]:

    
df.head(5)









    Out[4]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      momentum
      network
      on_perc
      optim_alg
      pruning_early_stop
      test_noise
      use_kwinners
      weight_decay
      weight_prune_perc
      sparse_linear_only
    
  
  
    
      0
      0_model=DSNNWeightedMag,on_perc=0.0
      0.092960
      19
      0.087052
      47
      0.089689
      0.089542
      0.103448
      22
      0.000000
      ...
      0.0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
      NaN
    
    
      1
      1_model=DSNNMixedHeb,on_perc=0.0
      0.091934
      24
      0.085880
      33
      0.089591
      0.089542
      0.103448
      10
      0.000000
      ...
      0.0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
      NaN
    
    
      2
      2_model=SparseModel,on_perc=0.0
      0.092325
      38
      0.085587
      99
      0.089322
      0.085587
      0.103448
      5
      0.000000
      ...
      0.0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
      NaN
    
    
      3
      3_model=DSNNWeightedMag,on_perc=0.005
      0.399522
      7
      0.156821
      0
      0.199736
      0.196026
      0.394592
      8
      0.145376
      ...
      0.0
      GSCHeb
      0.005
      SGD
      2
      False
      True
      0.01
      0.3
      NaN
    
    
      4
      4_model=DSNNMixedHeb,on_perc=0.005
      0.365931
      11
      0.181330
      0
      0.296016
      0.296065
      0.362461
      11
      0.087382
      ...
      0.0
      GSCHeb
      0.005
      SGD
      2
      False
      True
      0.01
      0.3
      NaN
    
  

5 rows × 41 columns



In [5]:

    
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)



In [6]:

    
df.columns









    Out[6]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time',
       'batch_size_test', 'batch_size_train', 'data_dir', 'dataset_name',
       'debug_sparse', 'debug_weights', 'device', 'hebbian_grow',
       'hebbian_prune_perc', 'learning_rate', 'lr_gamma', 'lr_milestones',
       'lr_scheduler', 'model', 'momentum', 'network', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay',
       'weight_prune_perc', 'sparse_linear_only'],
      dtype='object')



In [7]:

    
df.shape









    Out[7]:





(423, 41)



In [8]:

    
df.iloc[1]









    Out[8]:





Experiment Name                          1_model=DSNNMixedHeb,on_perc=0.0
train_acc_max                                                   0.0919344
train_acc_max_epoch                                                    24
train_acc_min                                                   0.0858803
train_acc_min_epoch                                                    33
train_acc_median                                                0.0895909
train_acc_last                                                   0.089542
val_acc_max                                                      0.103448
val_acc_max_epoch                                                      10
val_acc_min                                                             0
val_acc_min_epoch                                                       0
val_acc_median                                                  0.0971787
val_acc_last                                                    0.0971787
epochs                                                                100
experiment_file_name    /Users/lsouza/nta/results/improved_mag_gsc_eva...
trial_time                                                        26.1431
mean_epoch_time                                                  0.261431
batch_size_test                                                      1000
batch_size_train                                                       10
data_dir                                    /home/ubuntu/nta/datasets/gsc
dataset_name                                              PreprocessedGSC
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                      0
learning_rate                                                        0.01
lr_gamma                                                              0.9
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                        DSNNMixedHeb
momentum                                                                0
network                                                            GSCHeb
on_perc                                                                 0
optim_alg                                                             SGD
pruning_early_stop                                                      2
test_noise                                                          False
use_kwinners                                                         True
weight_decay                                                         0.01
weight_prune_perc                                                     0.3
sparse_linear_only                                                    NaN
Name: 1, dtype: object



In [9]:

    
df.groupby('model')['model'].count()









    Out[9]:





model
DSNNMixedHeb       141
DSNNWeightedMag    141
SparseModel        141
Name: model, dtype: int64

Analysis

Experiment Details



In [10]:

    
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()









    Out[10]:





6



In [11]:

    
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape









    Out[11]:





(417, 41)



In [12]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']









    Out[12]:





417    20
418    20
419    21
420     8
421     7
422     6
Name: epochs, dtype: int64



In [13]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

Does improved weight pruning outperforms regular SET



In [14]:

    
agg(['model'])









    Out[14]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      model
      
      
      
      
      
      
    
  
  
    
      DSNNMixedHeb
      43
      0.103
      0.951
      0.817
      0.180
      139
    
    
      DSNNWeightedMag
      40
      0.103
      0.952
      0.818
      0.180
      139
    
    
      SparseModel
      66
      0.103
      0.948
      0.825
      0.183
      139



In [15]:

    
agg(['on_perc'])









    Out[15]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      on_perc
      
      
      
      
      
      
    
  
  
    
      0.000
      20
      0.103
      0.106
      0.106
      0.001
      21
    
    
      0.005
      25
      0.218
      0.738
      0.628
      0.136
      21
    
    
      0.010
      27
      0.609
      0.817
      0.762
      0.056
      21
    
    
      0.015
      31
      0.743
      0.845
      0.811
      0.027
      21
    
    
      0.020
      33
      0.772
      0.863
      0.832
      0.028
      21
    
    
      0.025
      39
      0.793
      0.905
      0.842
      0.031
      21
    
    
      0.030
      48
      0.811
      0.915
      0.862
      0.025
      21
    
    
      0.035
      49
      0.840
      0.918
      0.868
      0.018
      21
    
    
      0.040
      60
      0.842
      0.926
      0.879
      0.019
      21
    
    
      0.045
      63
      0.850
      0.934
      0.880
      0.022
      21
    
    
      0.050
      51
      0.864
      0.938
      0.886
      0.020
      21
    
    
      0.055
      55
      0.861
      0.937
      0.885
      0.023
      21
    
    
      0.060
      51
      0.862
      0.935
      0.886
      0.022
      21
    
    
      0.065
      65
      0.866
      0.945
      0.892
      0.023
      18
    
    
      0.070
      62
      0.865
      0.943
      0.894
      0.023
      18
    
    
      0.075
      48
      0.870
      0.949
      0.897
      0.024
      18
    
    
      0.080
      62
      0.875
      0.948
      0.896
      0.023
      18
    
    
      0.085
      64
      0.866
      0.947
      0.902
      0.022
      18
    
    
      0.090
      76
      0.878
      0.951
      0.900
      0.023
      18
    
    
      0.095
      66
      0.879
      0.949
      0.901
      0.023
      18
    
    
      0.100
      71
      0.878
      0.952
      0.901
      0.023
      18



In [16]:

    
agg(['on_perc', 'model'])









    Out[16]:







  
    
      
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      on_perc
      model
      
      
      
      
      
      
    
  
  
    
      0.000
      DSNNMixedHeb
      27
      0.103
      0.106
      0.106
      0.001
      7
    
    
      DSNNWeightedMag
      16
      0.103
      0.106
      0.106
      0.001
      7
    
    
      SparseModel
      16
      0.103
      0.106
      0.106
      0.001
      7
    
    
      0.005
      DSNNMixedHeb
      7
      0.362
      0.726
      0.641
      0.126
      7
    
    
      DSNNWeightedMag
      5
      0.395
      0.721
      0.623
      0.110
      7
    
    
      SparseModel
      64
      0.218
      0.738
      0.619
      0.182
      7
    
    
      0.010
      DSNNMixedHeb
      7
      0.639
      0.809
      0.745
      0.058
      7
    
    
      DSNNWeightedMag
      8
      0.723
      0.793
      0.763
      0.028
      7
    
    
      SparseModel
      66
      0.609
      0.817
      0.778
      0.075
      7
    
    
      0.015
      DSNNMixedHeb
      9
      0.787
      0.845
      0.807
      0.021
      7
    
    
      DSNNWeightedMag
      10
      0.743
      0.837
      0.803
      0.030
      7
    
    
      SparseModel
      75
      0.761
      0.844
      0.823
      0.028
      7
    
    
      0.020
      DSNNMixedHeb
      9
      0.772
      0.852
      0.823
      0.025
      7
    
    
      DSNNWeightedMag
      14
      0.777
      0.860
      0.819
      0.029
      7
    
    
      SparseModel
      75
      0.819
      0.863
      0.854
      0.016
      7
    
    
      0.025
      DSNNMixedHeb
      23
      0.793
      0.873
      0.822
      0.029
      7
    
    
      DSNNWeightedMag
      15
      0.808
      0.905
      0.839
      0.032
      7
    
    
      SparseModel
      80
      0.841
      0.882
      0.865
      0.013
      7
    
    
      0.030
      DSNNMixedHeb
      32
      0.811
      0.890
      0.849
      0.026
      7
    
    
      DSNNWeightedMag
      33
      0.831
      0.915
      0.862
      0.028
      7
    
    
      SparseModel
      78
      0.858
      0.891
      0.875
      0.013
      7
    
    
      0.035
      DSNNMixedHeb
      31
      0.845
      0.894
      0.861
      0.016
      7
    
    
      DSNNWeightedMag
      47
      0.840
      0.918
      0.866
      0.026
      7
    
    
      SparseModel
      69
      0.871
      0.883
      0.877
      0.005
      7
    
    
      0.040
      DSNNMixedHeb
      63
      0.842
      0.916
      0.872
      0.024
      7
    
    
      DSNNWeightedMag
      49
      0.865
      0.926
      0.882
      0.022
      7
    
    
      SparseModel
      69
      0.866
      0.895
      0.882
      0.010
      7
    
    
      0.045
      DSNNMixedHeb
      52
      0.859
      0.930
      0.878
      0.024
      7
    
    
      DSNNWeightedMag
      72
      0.850
      0.934
      0.877
      0.028
      7
    
    
      SparseModel
      64
      0.866
      0.910
      0.886
      0.016
      7
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      0.055
      DSNNMixedHeb
      71
      0.861
      0.937
      0.880
      0.026
      7
    
    
      DSNNWeightedMag
      28
      0.861
      0.936
      0.889
      0.026
      7
    
    
      SparseModel
      67
      0.863
      0.928
      0.887
      0.022
      7
    
    
      0.060
      DSNNMixedHeb
      62
      0.862
      0.935
      0.884
      0.025
      7
    
    
      DSNNWeightedMag
      38
      0.863
      0.935
      0.881
      0.025
      7
    
    
      SparseModel
      55
      0.877
      0.934
      0.892
      0.019
      7
    
    
      0.065
      DSNNMixedHeb
      64
      0.884
      0.945
      0.898
      0.024
      6
    
    
      DSNNWeightedMag
      58
      0.866
      0.943
      0.886
      0.029
      6
    
    
      SparseModel
      74
      0.880
      0.929
      0.892
      0.019
      6
    
    
      0.070
      DSNNMixedHeb
      64
      0.871
      0.936
      0.897
      0.023
      6
    
    
      DSNNWeightedMag
      64
      0.874
      0.943
      0.890
      0.026
      6
    
    
      SparseModel
      60
      0.865
      0.935
      0.894
      0.024
      6
    
    
      0.075
      DSNNMixedHeb
      47
      0.885
      0.949
      0.901
      0.024
      6
    
    
      DSNNWeightedMag
      49
      0.870
      0.948
      0.893
      0.028
      6
    
    
      SparseModel
      47
      0.879
      0.944
      0.897
      0.024
      6
    
    
      0.080
      DSNNMixedHeb
      60
      0.885
      0.944
      0.900
      0.022
      6
    
    
      DSNNWeightedMag
      53
      0.878
      0.948
      0.893
      0.027
      6
    
    
      SparseModel
      72
      0.875
      0.938
      0.894
      0.023
      6
    
    
      0.085
      DSNNMixedHeb
      54
      0.888
      0.947
      0.902
      0.022
      6
    
    
      DSNNWeightedMag
      58
      0.866
      0.940
      0.903
      0.026
      6
    
    
      SparseModel
      79
      0.880
      0.946
      0.901
      0.024
      6
    
    
      0.090
      DSNNMixedHeb
      73
      0.887
      0.951
      0.905
      0.023
      6
    
    
      DSNNWeightedMag
      82
      0.880
      0.949
      0.898
      0.026
      6
    
    
      SparseModel
      74
      0.878
      0.944
      0.897
      0.024
      6
    
    
      0.095
      DSNNMixedHeb
      77
      0.885
      0.947
      0.903
      0.022
      6
    
    
      DSNNWeightedMag
      61
      0.890
      0.949
      0.904
      0.023
      6
    
    
      SparseModel
      59
      0.879
      0.948
      0.896
      0.026
      6
    
    
      0.100
      DSNNMixedHeb
      73
      0.886
      0.946
      0.905
      0.023
      6
    
    
      DSNNWeightedMag
      70
      0.878
      0.952
      0.902
      0.026
      6
    
    
      SparseModel
      69
      0.880
      0.946
      0.897
      0.025
      6
    
  

63 rows × 6 columns



In [17]:

    
# translate model names
rcParams['figure.figsize'] = 16, 8
d = {
    'DSNNWeightedMag': 'WeightedMagnitude',
    'DSNNMixedHeb': 'SET',
    'SparseModel': 'Static',        
}
df_plot = df.copy()
df_plot['model'] = df_plot['model'].apply(lambda x: d[x])



In [18]:

    
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a289d1b38>



In [19]:

    
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
plt.ylim(0.8,0.98)









    Out[19]:





(0.8, 0.98)



In [20]:

    
rcParams['figure.figsize'] = 16, 8
filter = df_plot['model'] != 'Static'
sns.lineplot(data=df_plot[filter], x='on_perc', y='val_acc_max_epoch', hue='model')









    Out[20]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a2875bda0>



In [21]:

    
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_last', hue='model')









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a286d2080>



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_min_epoch	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	network	on_perc	optim_alg	pruning_early_stop	test_noise	use_kwinners	weight_decay	weight_prune_perc	sparse_linear_only
0	0_model=DSNNWeightedMag,on_perc=0.0	0.092960	19	0.087052	47	0.089689	0.089542	0.103448	22	0.000000	...	GSCHeb	0.000	SGD	2	False	True	0.01	0.3	NaN
1	1_model=DSNNMixedHeb,on_perc=0.0	0.091934	24	0.085880	33	0.089591	0.089542	0.103448	10	0.000000	...	GSCHeb	0.000	SGD	2	False	True	0.01	0.3	NaN
2	2_model=SparseModel,on_perc=0.0	0.092325	38	0.085587	99	0.089322	0.085587	0.103448	5	0.000000	...	GSCHeb	0.000	SGD	2	False	True	0.01	0.3	NaN
3	3_model=DSNNWeightedMag,on_perc=0.005	0.399522	7	0.156821	0	0.199736	0.196026	0.394592	8	0.145376	...	GSCHeb	0.005	SGD	2	False	True	0.01	0.3	NaN
4	4_model=DSNNMixedHeb,on_perc=0.005	0.365931	11	0.181330	0	0.296016	0.296065	0.362461	11	0.087382	...	GSCHeb	0.005	SGD	2	False	True	0.01	0.3	NaN

	val_acc_max_epoch	val_acc_max				model
	round_mean	min	max	mean	std	count
model
DSNNMixedHeb	43	0.103	0.951	0.817	0.180	139
DSNNWeightedMag	40	0.103	0.952	0.818	0.180	139
SparseModel	66	0.103	0.948	0.825	0.183	139