Experiment:

Analyze Hebbian Learning with different choices of hyperparameters

Different prune perc values
Hebbian Grow or not
Kwinners or RELU

Motivation.

Verify if pruning by coactivations is better than pruning by magnitude
Verify if adding weights by coactivations is better than random

Conclusion

Negative correlation between accuracy and hebbian pruning percentage - 0.4% diff in acc between base model (0.976) and pruning 30% every epoch by hebbian learning (0.972). In contrast to what is seen in the magnitude based pruning
Random growth outperforms hebbian pruning by ~ 0.2%
ReLU better than KWinners (with 25% on perc), by ~ 0.2%



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
import sys
sys.path.append("../../")



In [3]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data



In [4]:

    
exps = ['neurips_debug_test2', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [5]:

    
df.head(5)









    Out[5]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      model
      momentum
      network
      num_classes
      on_perc
      optim_alg
      pruning_early_stop
      test_noise
      use_kwinners
      weight_decay
    
  
  
    
      0
      0_hebbian_grow=True,hebbian_prune_perc=0,use_k...
      0.982033
      29
      0.908617
      0
      0.978142
      0.982033
      0.9725
      18
      0.9545
      ...
      DSNNMixedHeb
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      True
      0.0001
    
    
      1
      1_hebbian_grow=False,hebbian_prune_perc=0,use_...
      0.983183
      27
      0.913283
      0
      0.979750
      0.982017
      0.9749
      11
      0.9602
      ...
      DSNNMixedHeb
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      True
      0.0001
    
    
      2
      2_hebbian_grow=True,hebbian_prune_perc=0.1,use...
      0.971917
      29
      0.910133
      0
      0.965208
      0.971917
      0.9699
      23
      0.9523
      ...
      DSNNMixedHeb
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      True
      0.0001
    
    
      3
      3_hebbian_grow=False,hebbian_prune_perc=0.1,us...
      0.978083
      14
      0.908817
      0
      0.976125
      0.974350
      0.9741
      11
      0.9527
      ...
      DSNNMixedHeb
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      True
      0.0001
    
    
      4
      4_hebbian_grow=True,hebbian_prune_perc=0.2,use...
      0.967267
      28
      0.906567
      0
      0.960475
      0.967167
      0.9707
      29
      0.9514
      ...
      DSNNMixedHeb
      0.9
      MLPHeb
      10
      0.2
      SGD
      0
      False
      True
      0.0001
    
  

5 rows × 41 columns



In [6]:

    
df.columns









    Out[6]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'network', 'num_classes', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay'],
      dtype='object')



In [7]:

    
df.shape









    Out[7]:





(72, 41)



In [8]:

    
df.iloc[1]









    Out[8]:





Experiment Name         1_hebbian_grow=False,hebbian_prune_perc=0,use_...
train_acc_max                                                    0.983183
train_acc_max_epoch                                                    27
train_acc_min                                                    0.913283
train_acc_min_epoch                                                     0
train_acc_median                                                  0.97975
train_acc_last                                                   0.982017
val_acc_max                                                        0.9749
val_acc_max_epoch                                                      11
val_acc_min                                                        0.9602
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9713
val_acc_last                                                       0.9736
epochs                                                                 30
experiment_file_name    /Users/lsouza/nta/results/neurips_debug_test2/...
trial_time                                                        18.5101
mean_epoch_time                                                  0.617004
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                      0
hidden_sizes                                                          100
input_size                                                            784
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                        DSNNMixedHeb
momentum                                                              0.9
network                                                            MLPHeb
num_classes                                                            10
on_perc                                                               0.2
optim_alg                                                             SGD
pruning_early_stop                                                      0
test_noise                                                          False
use_kwinners                                                         True
weight_decay                                                       0.0001
Name: 1, dtype: object



In [9]:

    
df.groupby('model')['model'].count()









    Out[9]:





model
DSNNMixedHeb    72
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, use_kwinners=tune.grid_search([True, False]), # model related model="DSNNMixedHeb", on_perc=0.2, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=tune.grid_search([0, 0.1, 0.2, 0.3, 0.4, 0.5]), pruning_early_stop=0, hebbian_grow=tune.grid_search([True, False]), # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, stop={"training_iteration": 30}, )



In [10]:

    
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()









    Out[10]:





0



In [11]:

    
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape









    Out[11]:





(72, 41)



In [12]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']









    Out[12]:





Series([], Name: epochs, dtype: int64)



In [13]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

Which level of hebbian pruning was better?



In [14]:

    
agg(['hebbian_prune_perc'])









    Out[14]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      17
      0.972
      0.978
      0.976
      0.002
      12
    
    
      0.1
      14
      0.970
      0.978
      0.973
      0.002
      12
    
    
      0.2
      13
      0.968
      0.975
      0.972
      0.002
      12
    
    
      0.3
      24
      0.968
      0.975
      0.971
      0.002
      12
    
    
      0.4
      23
      0.967
      0.973
      0.970
      0.002
      12
    
    
      0.5
      23
      0.967
      0.973
      0.970
      0.002
      12



In [19]:

    
relu_only = (df['use_kwinners'] == False)
agg(['hebbian_prune_perc'], relu_only)









    Out[19]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      19
      0.976
      0.978
      0.977
      0.000
      6
    
    
      0.1
      8
      0.971
      0.978
      0.974
      0.002
      6
    
    
      0.2
      9
      0.971
      0.975
      0.973
      0.002
      6
    
    
      0.3
      26
      0.971
      0.975
      0.972
      0.001
      6
    
    
      0.4
      23
      0.970
      0.973
      0.971
      0.001
      6
    
    
      0.5
      21
      0.969
      0.973
      0.971
      0.001
      6



In [20]:

    
kwinners_only = (df['use_kwinners'] == True)
agg(['hebbian_prune_perc'], kwinners_only)









    Out[20]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      
      
      
      
      
      
    
  
  
    
      0.0
      16
      0.972
      0.976
      0.975
      0.001
      6
    
    
      0.1
      21
      0.970
      0.976
      0.973
      0.003
      6
    
    
      0.2
      16
      0.968
      0.972
      0.970
      0.001
      6
    
    
      0.3
      21
      0.968
      0.970
      0.969
      0.001
      6
    
    
      0.4
      24
      0.967
      0.970
      0.969
      0.001
      6
    
    
      0.5
      25
      0.967
      0.970
      0.968
      0.001
      6

No evidence of hebbian learning improving performance. Actually the opposite behavior, there is a clear reduction in acc as hebbian prune percentage increase from 0 to 0.5

Did hebbian grow help?



In [15]:

    
with_pruning = (df['hebbian_prune_perc'] > 0)
agg(['hebbian_grow'], with_pruning)









    Out[15]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_grow
      
      
      
      
      
      
    
  
  
    
      False
      17
      0.967
      0.978
      0.972
      0.003
      30
    
    
      True
      22
      0.968
      0.973
      0.970
      0.001
      30

Random Growth outperforms growing connections by using coactivations (hebbian grow) by 0.2%



In [16]:

    
with_pruning = (df['hebbian_prune_perc'] > 0)
agg(['hebbian_prune_perc', 'hebbian_grow'])









    Out[16]:







  
    
      
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      hebbian_grow
      
      
      
      
      
      
    
  
  
    
      0.0
      False
      16
      0.974
      0.978
      0.976
      0.001
      6
    
    
      True
      19
      0.972
      0.977
      0.976
      0.002
      6
    
    
      0.1
      False
      13
      0.974
      0.978
      0.975
      0.001
      6
    
    
      True
      16
      0.970
      0.973
      0.971
      0.001
      6
    
    
      0.2
      False
      6
      0.970
      0.975
      0.973
      0.002
      6
    
    
      True
      20
      0.968
      0.972
      0.970
      0.001
      6
    
    
      0.3
      False
      23
      0.968
      0.975
      0.971
      0.002
      6
    
    
      True
      25
      0.969
      0.972
      0.971
      0.001
      6
    
    
      0.4
      False
      22
      0.967
      0.972
      0.970
      0.002
      6
    
    
      True
      25
      0.968
      0.973
      0.970
      0.002
      6
    
    
      0.5
      False
      20
      0.967
      0.973
      0.969
      0.002
      6
    
    
      True
      26
      0.968
      0.971
      0.970
      0.001
      6



In [ ]:

Which is better, kwinners or ReLU?



In [17]:

    
agg(['use_kwinners'])









    Out[17]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      use_kwinners
      
      
      
      
      
      
    
  
  
    
      False
      18
      0.969
      0.978
      0.973
      0.003
      36
    
    
      True
      20
      0.967
      0.976
      0.971
      0.003
      36



In [18]:

    
agg(['hebbian_prune_perc', 'use_kwinners'])









    Out[18]:







  
    
      
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      hebbian_prune_perc
      use_kwinners
      
      
      
      
      
      
    
  
  
    
      0.0
      False
      19
      0.976
      0.978
      0.977
      0.000
      6
    
    
      True
      16
      0.972
      0.976
      0.975
      0.001
      6
    
    
      0.1
      False
      8
      0.971
      0.978
      0.974
      0.002
      6
    
    
      True
      21
      0.970
      0.976
      0.973
      0.003
      6
    
    
      0.2
      False
      9
      0.971
      0.975
      0.973
      0.002
      6
    
    
      True
      16
      0.968
      0.972
      0.970
      0.001
      6
    
    
      0.3
      False
      26
      0.971
      0.975
      0.972
      0.001
      6
    
    
      True
      21
      0.968
      0.970
      0.969
      0.001
      6
    
    
      0.4
      False
      23
      0.970
      0.973
      0.971
      0.001
      6
    
    
      True
      24
      0.967
      0.970
      0.969
      0.001
      6
    
    
      0.5
      False
      21
      0.969
      0.973
      0.971
      0.001
      6
    
    
      True
      25
      0.967
      0.970
      0.968
      0.001
      6

ReLU outperforms KWinners (with 25% perc) on all scenarios, by about 0.2%



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	model	momentum	network	num_classes	on_perc	optim_alg	test_noise	use_kwinners	weight_decay
0	0_hebbian_grow=True,hebbian_prune_perc=0,use_k...	0.982033	29	0.908617	0.978142	0.982033	0.9725	18	0.9545	...	DSNNMixedHeb	0.9	MLPHeb	10	0.2	SGD	False	True	0.0001
1	1_hebbian_grow=False,hebbian_prune_perc=0,use_...	0.983183	27	0.913283	0.979750	0.982017	0.9749	11	0.9602	...	DSNNMixedHeb	0.9	MLPHeb	10	0.2	SGD	False	True	0.0001
2	2_hebbian_grow=True,hebbian_prune_perc=0.1,use...	0.971917	29	0.910133	0.965208	0.971917	0.9699	23	0.9523	...	DSNNMixedHeb	0.9	MLPHeb	10	0.2	SGD	False	True	0.0001
3	3_hebbian_grow=False,hebbian_prune_perc=0.1,us...	0.978083	14	0.908817	0.976125	0.974350	0.9741	11	0.9527	...	DSNNMixedHeb	0.9	MLPHeb	10	0.2	SGD	False	True	0.0001
4	4_hebbian_grow=True,hebbian_prune_perc=0.2,use...	0.967267	28	0.906567	0.960475	0.967167	0.9707	29	0.9514	...	DSNNMixedHeb	0.9	MLPHeb	10	0.2	SGD	False	True	0.0001

	val_acc_max_epoch	val_acc_max				model
	round_mean	min	max	mean	std	count
hebbian_prune_perc
0.0	17	0.972	0.978	0.976	0.002	12
0.1	14	0.970	0.978	0.973	0.002	12
0.2	13	0.968	0.975	0.972	0.002	12
0.3	24	0.968	0.975	0.971	0.002	12
0.4	23	0.967	0.973	0.970	0.002	12
0.5	23	0.967	0.973	0.970	0.002	12