Experiment:

Evaluate pruning by magnitude weighted by coactivations (more thorough evaluation), compare it to baseline (SET), in GSC. Applied only to linear layers

Motivation.

Check if results are consistently above baseline.

Conclusion

# experiment configurations base_exp_config = dict( device="cuda", # dataset related dataset_name="PreprocessedGSC", data_dir=os.path.expanduser("~/nta/datasets/gsc"), batch_size_train=(4, 16), batch_size_test=1000, # network related network="GSCHeb", optim_alg="SGD", momentum=0, learning_rate=0.01, weight_decay=0.01, lr_scheduler="MultiStepLR", lr_milestones=[30, 60, 90], lr_gamma=0.90, use_kwinners=True, # model related model=tune.grid_search(["DSNNWeightedMag", "DSNNMixedHeb", "SparseModel"]), on_perc=tune.grid_search(list(np.arange(0, 0.101, 0.005))), # sparse related hebbian_prune_perc=None, hebbian_grow=False, weight_prune_perc=0.3, pruning_early_stop=2, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_eval3", num_samples=1, # num_samples=3, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={"cpu": 1, "gpu": .25}, loggers=DEFAULT_LOGGERS, verbose=0, )



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data



In [3]:

    
exps = ['improved_mag_gsc_eval3', 'improved_mag_gsc_eval4']
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [4]:

    
df.head(5)









    Out[4]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      model
      momentum
      network
      on_perc
      optim_alg
      pruning_early_stop
      test_noise
      use_kwinners
      weight_decay
      weight_prune_perc
    
  
  
    
      0
      0_model=DSNNWeightedMag,on_perc=0.0
      0.092960
      19
      0.087052
      47
      0.089689
      0.089542
      0.103448
      22
      0.000000
      ...
      DSNNWeightedMag
      0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
    
    
      1
      1_model=DSNNMixedHeb,on_perc=0.0
      0.091934
      24
      0.085880
      33
      0.089591
      0.089542
      0.103448
      10
      0.000000
      ...
      DSNNMixedHeb
      0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
    
    
      2
      2_model=SparseModel,on_perc=0.0
      0.092325
      38
      0.085587
      99
      0.089322
      0.085587
      0.103448
      5
      0.000000
      ...
      SparseModel
      0
      GSCHeb
      0.000
      SGD
      2
      False
      True
      0.01
      0.3
    
    
      3
      3_model=DSNNWeightedMag,on_perc=0.005
      0.399522
      7
      0.156821
      0
      0.199736
      0.196026
      0.394592
      8
      0.145376
      ...
      DSNNWeightedMag
      0
      GSCHeb
      0.005
      SGD
      2
      False
      True
      0.01
      0.3
    
    
      4
      4_model=DSNNMixedHeb,on_perc=0.005
      0.365931
      11
      0.181330
      0
      0.296016
      0.296065
      0.362461
      11
      0.087382
      ...
      DSNNMixedHeb
      0
      GSCHeb
      0.005
      SGD
      2
      False
      True
      0.01
      0.3
    
  

5 rows × 41 columns



In [5]:

    
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)



In [6]:

    
df.columns









    Out[6]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'val_acc_all',
       'epochs', 'experiment_file_name', 'trial_time', 'mean_epoch_time',
       'batch_size_test', 'batch_size_train', 'data_dir', 'dataset_name',
       'debug_sparse', 'debug_weights', 'device', 'hebbian_grow',
       'hebbian_prune_perc', 'learning_rate', 'lr_gamma', 'lr_milestones',
       'lr_scheduler', 'model', 'momentum', 'network', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay',
       'weight_prune_perc'],
      dtype='object')



In [7]:

    
df.shape









    Out[7]:





(269, 41)



In [8]:

    
df.iloc[1]









    Out[8]:





Experiment Name                          1_model=DSNNMixedHeb,on_perc=0.0
train_acc_max                                                   0.0919344
train_acc_max_epoch                                                    24
train_acc_min                                                   0.0858803
train_acc_min_epoch                                                    33
train_acc_median                                                0.0895909
train_acc_last                                                   0.089542
val_acc_max                                                      0.103448
val_acc_max_epoch                                                      10
val_acc_min                                                             0
val_acc_min_epoch                                                       0
val_acc_median                                                  0.0971787
val_acc_last                                                    0.0971787
val_acc_all             0     0.000000
1     0.101489
2     0.097179
3...
epochs                                                                100
experiment_file_name    /Users/lsouza/nta/results/improved_mag_gsc_eva...
trial_time                                                        26.1431
mean_epoch_time                                                  0.261431
batch_size_test                                                      1000
batch_size_train                                                       10
data_dir                                    /home/ubuntu/nta/datasets/gsc
dataset_name                                              PreprocessedGSC
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                      0
learning_rate                                                        0.01
lr_gamma                                                              0.9
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                        DSNNMixedHeb
momentum                                                                0
network                                                            GSCHeb
on_perc                                                                 0
optim_alg                                                             SGD
pruning_early_stop                                                      2
test_noise                                                          False
use_kwinners                                                         True
weight_decay                                                         0.01
weight_prune_perc                                                     0.3
Name: 1, dtype: object



In [9]:

    
df.groupby('model')['model'].count()









    Out[9]:





model
DSNNMixedHeb       90
DSNNWeightedMag    90
SparseModel        89
Name: model, dtype: int64

Analysis

Experiment Details



In [10]:

    
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()









    Out[10]:





5



In [11]:

    
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape









    Out[11]:





(264, 41)



In [12]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']









    Out[12]:





264    26
265    23
266    26
267    11
268     1
Name: epochs, dtype: int64



In [13]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

Does improved weight pruning outperforms regular SET



In [14]:

    
agg(['model'])









    Out[14]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      model
      
      
      
      
      
      
    
  
  
    
      DSNNMixedHeb
      54
      0.103
      0.954
      0.823
      0.228
      88
    
    
      DSNNWeightedMag
      63
      0.103
      0.952
      0.834
      0.223
      88
    
    
      SparseModel
      74
      0.103
      0.949
      0.808
      0.238
      88



In [15]:

    
agg(['on_perc'])









    Out[15]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      on_perc
      
      
      
      
      
      
    
  
  
    
      0.000
      10
      0.103
      0.106
      0.104
      0.001
      15
    
    
      0.005
      25
      0.218
      0.425
      0.352
      0.059
      15
    
    
      0.010
      35
      0.609
      0.732
      0.666
      0.044
      15
    
    
      0.015
      40
      0.735
      0.822
      0.779
      0.028
      15
    
    
      0.020
      50
      0.807
      0.880
      0.836
      0.025
      12
    
    
      0.025
      56
      0.841
      0.905
      0.875
      0.021
      12
    
    
      0.030
      73
      0.865
      0.915
      0.891
      0.016
      12
    
    
      0.035
      69
      0.878
      0.922
      0.900
      0.015
      12
    
    
      0.040
      70
      0.895
      0.927
      0.914
      0.010
      12
    
    
      0.045
      77
      0.910
      0.942
      0.926
      0.009
      12
    
    
      0.050
      78
      0.915
      0.938
      0.927
      0.006
      12
    
    
      0.055
      73
      0.928
      0.942
      0.933
      0.004
      12
    
    
      0.060
      73
      0.931
      0.947
      0.936
      0.005
      12
    
    
      0.065
      82
      0.929
      0.947
      0.940
      0.006
      12
    
    
      0.070
      74
      0.924
      0.947
      0.938
      0.006
      12
    
    
      0.075
      84
      0.939
      0.949
      0.944
      0.003
      12
    
    
      0.080
      80
      0.938
      0.952
      0.944
      0.004
      12
    
    
      0.085
      82
      0.938
      0.951
      0.945
      0.004
      12
    
    
      0.090
      83
      0.943
      0.951
      0.948
      0.002
      12
    
    
      0.095
      74
      0.942
      0.954
      0.947
      0.003
      12
    
    
      0.100
      83
      0.946
      0.954
      0.949
      0.002
      12



In [24]:

    
agg(['on_perc', 'model'], df['on_perc'] == 0.02)









    Out[24]:







  
    
      
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      on_perc
      model
      
      
      
      
      
      
    
  
  
    
      0.02
      DSNNMixedHeb
      39
      0.819
      0.852
      0.836
      0.015
      4
    
    
      DSNNWeightedMag
      42
      0.830
      0.880
      0.859
      0.025
      4
    
    
      SparseModel
      70
      0.807
      0.820
      0.814
      0.007
      4



In [17]:

    
# translate model names
rcParams['figure.figsize'] = 16, 8
d = {
    'DSNNWeightedMag': 'Dynamic Sparse Neural Network (DSNN)',
    'DSNNMixedHeb': 'Sparse Evolutionary Training (SET)',
    'SparseModel': 'Static Sparse',        
}
df_plot = df.copy()
df_plot['model'] = df_plot['model'].apply(lambda x: d[x])



In [18]:

    
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a20bb9f60>



In [19]:

    
def plot_for_paper():
    rcParams['figure.figsize'] = 10,6
    matplotlib.rc('xtick', labelsize=14)
    matplotlib.rc('ytick', labelsize=14)
    matplotlib.rc('ytick', labelsize=14)
    plt.rcParams.update({'axes.labelsize': 14})
    plt.rcParams.update({'legend.fontsize': 14, 'legend.loc': 'lower right'})
    plt.rcParams.update({'font.size': 14})
    plt.rcParams.update({"axes.grid": True, "grid.linewidth": 0.5})
    

plot_for_paper()

fig, ax = plt.subplots()
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
plt.xlabel("% of active weights (σ)")
plt.ylabel("test accuracy")
plt.ylim((0.4, 1.0))
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles[1:], labels=labels[1:])
plt.savefig('active_weights.png', dpi=300, bbox_inches='tight')



In [20]:

    
# plt.rcParams['figure.figsize'] = (8, 3)
    font_size=12
#     plt.rcParams.update({'font.size': font_size})
#     plt.rcParams.update({'font.family': 'Times New Roman'})
#     plt.rcParams.update({'axes.labelsize': font_size})
#     plt.rcParams.update({'axes.titlesize': 1.5*font_size})
#     plt.rcParams.update({'legend.fontsize': font_size})
#     plt.rcParams.update({'xtick.labelsize': font_size})
#     plt.rcParams.update({'ytick.labelsize': font_size})
#     plt.rcParams.update({'savefig.dpi': 2*plt.rcParams['savefig.dpi']})
#     plt.rcParams.update({'xtick.major.size': 3})
#     plt.rcParams.update({'xtick.minor.size': 3})
#     plt.rcParams.update({'xtick.major.width': 1})
#     plt.rcParams.update({'xtick.minor.width': 1})
#     plt.rcParams.update({'ytick.major.size': 3})
#     plt.rcParams.update({'ytick.minor.size': 3})
#     plt.rcParams.update({'ytick.major.width': 1})
#     plt.rcParams.update({'ytick.minor.width': 1 })
    # plt.rcParams.update({})



In [21]:

    
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
plt.ylim(0.8,0.98)









    Out[21]:





(0.8, 0.98)



In [22]:

    
rcParams['figure.figsize'] = 16, 8
filter = df_plot['model'] != 'Static'
sns.lineplot(data=df_plot[filter], x='on_perc', y='val_acc_max_epoch', hue='model')









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a20115128>



In [23]:

    
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_last', hue='model')









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a20aa4668>



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_min_epoch	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	model	network	on_perc	optim_alg	pruning_early_stop	test_noise	use_kwinners	weight_decay	weight_prune_perc
0	0_model=DSNNWeightedMag,on_perc=0.0	0.092960	19	0.087052	47	0.089689	0.089542	0.103448	22	0.000000	...	DSNNWeightedMag	GSCHeb	0.000	SGD	2	False	True	0.01	0.3
1	1_model=DSNNMixedHeb,on_perc=0.0	0.091934	24	0.085880	33	0.089591	0.089542	0.103448	10	0.000000	...	DSNNMixedHeb	GSCHeb	0.000	SGD	2	False	True	0.01	0.3
2	2_model=SparseModel,on_perc=0.0	0.092325	38	0.085587	99	0.089322	0.085587	0.103448	5	0.000000	...	SparseModel	GSCHeb	0.000	SGD	2	False	True	0.01	0.3
3	3_model=DSNNWeightedMag,on_perc=0.005	0.399522	7	0.156821	0	0.199736	0.196026	0.394592	8	0.145376	...	DSNNWeightedMag	GSCHeb	0.005	SGD	2	False	True	0.01	0.3
4	4_model=DSNNMixedHeb,on_perc=0.005	0.365931	11	0.181330	0	0.296016	0.296065	0.362461	11	0.087382	...	DSNNMixedHeb	GSCHeb	0.005	SGD	2	False	True	0.01	0.3

	val_acc_max_epoch	val_acc_max				model
	round_mean	min	max	mean	std	count
model
DSNNMixedHeb	54	0.103	0.954	0.823	0.228	88
DSNNWeightedMag	63	0.103	0.952	0.834	0.223	88
SparseModel	74	0.103	0.949	0.808	0.238	88