Experiment:

Evaluate pruning by magnitude weighted by coactivations.

Motivation.

Test new proposed method



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import sys
sys.path.append("../../")



In [3]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data



In [4]:

    
exps = ['improved_magpruning_test1', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [5]:

    
df.head(5)









    Out[5]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      momentum
      network
      num_classes
      on_perc
      optim_alg
      pruning_early_stop
      test_noise
      use_kwinners
      weight_decay
      weight_prune_perc
    
  
  
    
      0
      0_weight_prune_perc=None
      0.988317
      28
      0.925067
      0
      0.985508
      0.987333
      0.9761
      17
      0.9628
      ...
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      False
      0.0001
      NaN
    
    
      1
      1_weight_prune_perc=0.1
      0.992317
      29
      0.926417
      0
      0.988500
      0.992317
      0.9803
      25
      0.9562
      ...
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      False
      0.0001
      0.1
    
    
      2
      2_weight_prune_perc=0.2
      0.992067
      24
      0.925400
      0
      0.988567
      0.991500
      0.9793
      28
      0.9598
      ...
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      False
      0.0001
      0.2
    
    
      3
      3_weight_prune_perc=0.3
      0.991183
      29
      0.927700
      0
      0.987950
      0.991183
      0.9814
      24
      0.9612
      ...
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      False
      0.0001
      0.3
    
    
      4
      4_weight_prune_perc=0.4
      0.990383
      24
      0.925767
      0
      0.986525
      0.990117
      0.9787
      21
      0.9632
      ...
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      False
      0.0001
      0.4
    
  

5 rows × 41 columns



In [6]:

    
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)



In [7]:

    
df.columns









    Out[7]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_prune_perc', 'hidden_sizes', 'input_size', 'learning_rate',
       'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model', 'momentum',
       'network', 'num_classes', 'on_perc', 'optim_alg', 'pruning_early_stop',
       'test_noise', 'use_kwinners', 'weight_decay', 'weight_prune_perc'],
      dtype='object')



In [8]:

    
df.shape









    Out[8]:





(24, 41)



In [9]:

    
df.iloc[1]









    Out[9]:





Experiment Name                                   1_weight_prune_perc=0.1
train_acc_max                                                    0.992317
train_acc_max_epoch                                                    29
train_acc_min                                                    0.926417
train_acc_min_epoch                                                     0
train_acc_median                                                   0.9885
train_acc_last                                                   0.992317
val_acc_max                                                        0.9803
val_acc_max_epoch                                                      25
val_acc_min                                                        0.9562
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9769
val_acc_last                                                       0.9764
epochs                                                                 30
experiment_file_name    /Users/lsouza/nta/results/improved_magpruning_...
trial_time                                                        17.3449
mean_epoch_time                                                  0.578165
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_prune_perc                                                      0
hidden_sizes                                                          100
input_size                                                            784
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                     DSNNWeightedMag
momentum                                                              0.9
network                                                            MLPHeb
num_classes                                                            10
on_perc                                                               0.2
optim_alg                                                             SGD
pruning_early_stop                                                      0
test_noise                                                          False
use_kwinners                                                        False
weight_decay                                                       0.0001
weight_prune_perc                                                     0.1
Name: 1, dtype: object



In [10]:

    
df.groupby('model')['model'].count()









    Out[10]:





model
DSNNWeightedMag    24
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, use_kwinners=False, # model related model="DSNNWeightedMag", on_perc=0.2, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=None, weight_prune_perc=tune.grid_search([None, 0.1, 0.2, 0.3, 0.4, 0.5]), pruning_early_stop=0, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_test1", num_samples=4, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 30}, resources_per_trial={"cpu": 1, "gpu": 0.165}, loggers=DEFAULT_LOGGERS, verbose=0, )



In [11]:

    
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()









    Out[11]:





0



In [12]:

    
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape









    Out[12]:





(24, 41)



In [13]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']









    Out[13]:





Series([], Name: epochs, dtype: int64)



In [14]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

What are optimal levels of hebbian and weight pruning



In [21]:

    
agg(['weight_prune_perc'])









    Out[21]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      20
      0.976
      0.977
      0.977
      0.000
      4
    
    
      0.1
      22
      0.979
      0.982
      0.980
      0.001
      4
    
    
      0.2
      27
      0.978
      0.979
      0.979
      0.001
      4
    
    
      0.3
      24
      0.980
      0.981
      0.980
      0.001
      4
    
    
      0.4
      24
      0.979
      0.981
      0.980
      0.001
      4
    
    
      0.5
      23
      0.978
      0.980
      0.979
      0.001
      4



In [22]:

    
multi2 = (df['weight_prune_perc'] % 0.2 == 0)
agg(['weight_prune_perc'], multi2)









    Out[22]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      weight_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      20
      0.976
      0.977
      0.977
      0.000
      4
    
    
      0.2
      27
      0.978
      0.979
      0.979
      0.001
      4
    
    
      0.4
      24
      0.979
      0.981
      0.980
      0.001
      4

No relevant difference



In [23]:

    
pd.pivot_table(df[filter], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)









    Out[23]:







  
    
      weight_prune_perc
      0.0
      0.2
      0.4
      0.6
      0.8
    
    
      hebbian_prune_perc
      
      
      
      
      
    
  
  
    
      0.0
      0.976 ± 0.000
      0.981 ± 0.001
      0.980 ± 0.000
      0.980 ± 0.001
      0.979 ± 0.001
    
    
      0.2
      0.974 ± 0.001
      0.979 ± 0.001
      0.979 ± 0.003
      0.979 ± 0.002
      0.977 ± 0.001
    
    
      0.4
      0.971 ± 0.001
      0.980 ± 0.001
      0.979 ± 0.001
      0.979 ± 0.001
      0.978 ± 0.001
    
    
      0.6
      0.969 ± 0.001
      0.979 ± 0.001
      0.980 ± 0.001
      0.979 ± 0.001
      0.978 ± 0.001
    
    
      0.8
      0.966 ± 0.001
      0.980 ± 0.001
      0.980 ± 0.002
      0.980 ± 0.001
      0.978 ± 0.000
    
    
      1.0
      0.964 ± 0.001
      0.981 ± 0.001
      0.981 ± 0.001
      0.980 ± 0.001
      0.980 ± 0.001



In [19]:

    
df.shape









    Out[19]:





(108, 42)

Conclusions:

No pruning leads (0,0) to acc of 0.976
Pruning all connections at every epoch (1,0) leads to acc of 0.964
Best performing model is still no hebbian pruning, and weight pruning set to 0.2 (0.981)
Pruning only by hebbian learning decreases accuracy
Combining hebbian and weight magnitude is not an improvement compared to simple weight magnitude pruning



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	momentum	network	num_classes	on_perc	optim_alg	test_noise	use_kwinners	weight_decay	weight_prune_perc
0	0_weight_prune_perc=None	0.988317	28	0.925067	0.985508	0.987333	0.9761	17	0.9628	...	0.9	MLPHeb	10	0.2	SGD	False	False	0.0001	NaN
1	1_weight_prune_perc=0.1	0.992317	29	0.926417	0.988500	0.992317	0.9803	25	0.9562	...	0.9	MLPHeb	10	0.2	SGD	False	False	0.0001	0.1
2	2_weight_prune_perc=0.2	0.992067	24	0.925400	0.988567	0.991500	0.9793	28	0.9598	...	0.9	MLPHeb	10	0.2	SGD	False	False	0.0001	0.2
3	3_weight_prune_perc=0.3	0.991183	29	0.927700	0.987950	0.991183	0.9814	24	0.9612	...	0.9	MLPHeb	10	0.2	SGD	False	False	0.0001	0.3
4	4_weight_prune_perc=0.4	0.990383	24	0.925767	0.986525	0.990117	0.9787	21	0.9632	...	0.9	MLPHeb	10	0.2	SGD	False	False	0.0001	0.4

	val_acc_max_epoch	val_acc_max				model
	round_mean	min	max	mean	std	count
weight_prune_perc
0.0	20	0.976	0.977	0.977	0.000	4
0.1	22	0.979	0.982	0.980	0.001	4
0.2	27	0.978	0.979	0.979	0.001	4
0.3	24	0.980	0.981	0.980	0.001	4
0.4	24	0.979	0.981	0.980	0.001	4
0.5	23	0.978	0.980	0.979	0.001	4

weight_prune_perc	0.0	0.2	0.4	0.6	0.8
hebbian_prune_perc
0.0	0.976 ± 0.000	0.981 ± 0.001	0.980 ± 0.000	0.980 ± 0.001	0.979 ± 0.001
0.2	0.974 ± 0.001	0.979 ± 0.001	0.979 ± 0.003	0.979 ± 0.002	0.977 ± 0.001
0.4	0.971 ± 0.001	0.980 ± 0.001	0.979 ± 0.001	0.979 ± 0.001	0.978 ± 0.001
0.6	0.969 ± 0.001	0.979 ± 0.001	0.980 ± 0.001	0.979 ± 0.001	0.978 ± 0.001
0.8	0.966 ± 0.001	0.980 ± 0.001	0.980 ± 0.002	0.980 ± 0.001	0.978 ± 0.000
1.0	0.964 ± 0.001	0.981 ± 0.001	0.981 ± 0.001	0.980 ± 0.001	0.980 ± 0.001