In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [28]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data


In [29]:
exps = ['neurips_1_eval1', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [30]:
df.head(5)


Out[30]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... lr_scheduler model momentum network num_classes on_perc optim_alg pruning_early_stop test_noise weight_decay
0 0_pruning_early_stop=0 0.998533 73 0.910383 0 0.997575 0.998417 0.9807 86 0.9454 ... MultiStepLR SET 0.9 MLP 10 0.1 SGD 0 False 0.0001
1 1_pruning_early_stop=1 0.999000 96 0.909583 0 0.997783 0.998367 0.9814 69 0.9513 ... MultiStepLR SET 0.9 MLP 10 0.1 SGD 1 False 0.0001
2 2_pruning_early_stop=2 0.998033 99 0.907067 0 0.997092 0.998033 0.9795 35 0.9560 ... MultiStepLR SET 0.9 MLP 10 0.1 SGD 2 False 0.0001
3 3_pruning_early_stop=3 0.998467 74 0.911750 0 0.996508 0.996817 0.9828 35 0.9480 ... MultiStepLR SET 0.9 MLP 10 0.1 SGD 3 False 0.0001
4 4_pruning_early_stop=0 0.998350 78 0.911233 0 0.997158 0.997400 0.9816 37 0.9531 ... MultiStepLR SET 0.9 MLP 10 0.1 SGD 0 False 0.0001

5 rows × 41 columns


In [31]:
df.columns


Out[31]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'kwinners', 'learning_rate', 'lr_gamma', 'lr_milestones',
       'lr_scheduler', 'model', 'momentum', 'network', 'num_classes',
       'on_perc', 'optim_alg', 'pruning_early_stop', 'test_noise',
       'weight_decay'],
      dtype='object')

In [32]:
df.shape


Out[32]:
(10, 41)

In [33]:
df.iloc[1]


Out[33]:
Experiment Name                                    1_pruning_early_stop=1
train_acc_max                                                       0.999
train_acc_max_epoch                                                    96
train_acc_min                                                    0.909583
train_acc_min_epoch                                                     0
train_acc_median                                                 0.997783
train_acc_last                                                   0.998367
val_acc_max                                                        0.9814
val_acc_max_epoch                                                      69
val_acc_min                                                        0.9513
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9803
val_acc_last                                                       0.9805
epochs                                                                100
experiment_file_name    /Users/lsouza/nta/results/neurips_1_eval1/expe...
trial_time                                                        20.0659
mean_epoch_time                                                  0.200659
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                    0.3
hidden_sizes                                                          100
input_size                                                            784
kwinners                                                            False
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                                 SET
momentum                                                              0.9
network                                                               MLP
num_classes                                                            10
on_perc                                                               0.1
optim_alg                                                             SGD
pruning_early_stop                                                      1
test_noise                                                          False
weight_decay                                                       0.0001
Name: 1, dtype: object

In [39]:
df.groupby('model')['model'].count()


Out[39]:
model
SET    10
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLP", # "MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, kwinners=False, # model related model="SET", #"DSNNMixedHeb", on_perc=0.1, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=0.3, pruning_early_stop=1, #tune.grid_search([None, 1, 2, 3]), hebbian_grow=False, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, stop={"training_iteration": 100}, )

In [40]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[40]:
0

In [41]:
# Removing failed trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[41]:
(10, 41)

In [42]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<100
df_origin[df_origin['failed']]['epochs']


Out[42]:
8    80
9    63
Name: epochs, dtype: int64

In [43]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
Did Hebbian perform better than SET?

In [44]:
agg(['model'])


Out[44]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
model
SET 56 0.98 0.983 0.981 0.001 10

In [12]:
high_sparsity = (df['on_perc']==0.05)
avg_sparsity = (df['on_perc']==0.1)
low_sparsity = (df['on_perc']==0.2)

In [13]:
agg(['kwinners'], low_sparsity)


Out[13]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
kwinners
False 21 0.932 0.978 0.963 0.009 19 0.908 0.963 0.948 0.011 69
True 22 0.938 0.976 0.964 0.008 20 0.911 0.963 0.949 0.010 68

In [14]:
agg(['kwinners'], high_sparsity)


Out[14]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
kwinners
False 24 0.398 0.969 0.904 0.128 22 0.392 0.941 0.872 0.121 37
True 23 0.295 0.967 0.882 0.166 22 0.292 0.937 0.851 0.157 39
  • No evidence of significant difference. In networks with high sparsity, the impact of kWinners is worst, which is expected since kWinners (at 30%) will make the activations more sparse than ReLU (which is 50% sparse on average)
What is the optimal level of weight sparsity?

In [15]:
agg(['on_perc'])


Out[15]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
on_perc
0.05 24 0.295 0.969 0.893 0.148 22 0.292 0.941 0.861 0.140 76
0.10 22 0.888 0.974 0.952 0.020 21 0.851 0.954 0.929 0.024 130
0.20 22 0.932 0.978 0.963 0.009 19 0.908 0.963 0.948 0.010 137
  • Sparsity at 80 and 90% levels seem more or less equivalent, difference is 1 point in accuracy. The jump from 90 to 95% shows a drastic increase in acc, of 6 points.
Hebbian grow helps learning?

In [16]:
agg(['hebbian_grow'])


Out[16]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 22 0.888 0.978 0.956 0.017 21 0.851 0.963 0.932 0.022 176
True 22 0.295 0.972 0.930 0.106 20 0.292 0.960 0.911 0.105 167

In [17]:
agg(['hebbian_grow'], low_sparsity)


Out[17]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 20 0.932 0.978 0.962 0.011 18 0.908 0.963 0.945 0.013 65
True 23 0.938 0.972 0.965 0.005 21 0.914 0.960 0.951 0.006 72

In [18]:
agg(['hebbian_grow'], high_sparsity)


Out[18]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 25 0.905 0.969 0.951 0.016 24 0.853 0.941 0.916 0.019 48
True 22 0.295 0.954 0.793 0.210 19 0.292 0.923 0.769 0.200 28
  • No strong evidence it helps in low sparsity case. In high sparsity (95%), seems very harmful
Hebbian pruning helps learning?

In [19]:
agg(['hebbian_prune_perc'])


Out[19]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 16 0.888 0.971 0.943 0.024 16 0.851 0.959 0.923 0.033 31
0.1 20 0.900 0.969 0.942 0.019 20 0.868 0.957 0.922 0.026 45
0.2 23 0.402 0.970 0.928 0.102 22 0.396 0.960 0.907 0.101 64
0.3 23 0.538 0.968 0.939 0.078 22 0.530 0.952 0.918 0.077 67
0.4 24 0.398 0.974 0.953 0.073 22 0.392 0.960 0.931 0.072 67
0.5 23 0.295 0.978 0.953 0.085 19 0.292 0.963 0.930 0.084 69

In [20]:
agg(['hebbian_prune_perc'], low_sparsity)


Out[20]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 18 0.932 0.971 0.956 0.011 18 0.908 0.959 0.941 0.016 17
0.1 22 0.945 0.969 0.955 0.008 22 0.925 0.957 0.940 0.011 24
0.2 22 0.938 0.970 0.961 0.006 21 0.914 0.960 0.946 0.009 24
0.3 23 0.960 0.968 0.964 0.002 21 0.945 0.952 0.950 0.002 24
0.4 23 0.964 0.974 0.969 0.003 20 0.948 0.960 0.954 0.003 24
0.5 21 0.967 0.978 0.972 0.003 15 0.950 0.963 0.957 0.004 24

In [21]:
agg(['hebbian_prune_perc'], high_sparsity)


Out[21]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.1 29 0.900 0.900 0.900 NaN 29 0.879 0.879 0.879 NaN 1
0.2 25 0.402 0.952 0.847 0.185 24 0.396 0.920 0.815 0.174 16
0.3 21 0.538 0.952 0.882 0.131 21 0.530 0.926 0.852 0.124 19
0.4 25 0.398 0.966 0.918 0.133 23 0.392 0.937 0.886 0.126 19
0.5 24 0.295 0.969 0.914 0.149 21 0.292 0.941 0.882 0.142 21
  • There is good evidence it helps. The trend is very clear in the low sparsity (80% sparse) cases.

In [22]:
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], no_magnitude)


Out[22]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 15 0.895 0.971 0.939 0.032 15 0.853 0.959 0.915 0.047 5
0.1 18 0.905 0.965 0.940 0.022 17 0.880 0.953 0.920 0.028 8
0.2 23 0.402 0.970 0.905 0.167 22 0.396 0.960 0.885 0.163 11
0.3 21 0.561 0.964 0.906 0.124 21 0.549 0.951 0.886 0.122 12
0.4 24 0.938 0.968 0.960 0.010 22 0.903 0.954 0.934 0.017 12
0.5 22 0.942 0.973 0.963 0.009 16 0.901 0.954 0.935 0.017 12

In [23]:
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], (no_magnitude & low_sparsity))


Out[23]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 24 0.959 0.971 0.965 0.009 29 0.947 0.959 0.953 0.008 2
0.1 23 0.947 0.965 0.956 0.009 21 0.931 0.953 0.942 0.012 4
0.2 21 0.958 0.970 0.963 0.005 20 0.941 0.960 0.948 0.009 4
0.3 24 0.963 0.964 0.964 0.001 24 0.945 0.951 0.948 0.003 4
0.4 22 0.966 0.968 0.967 0.001 24 0.948 0.954 0.950 0.003 4
0.5 20 0.968 0.973 0.971 0.002 10 0.950 0.954 0.952 0.002 4
  • Results seem similar even when no magnitude pruning is involved, only hebbian pruning
Magnitude pruning helps learning?

In [24]:
agg(['weight_prune_perc'])


Out[24]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 21 0.402 0.973 0.935 0.091 19 0.396 0.960 0.912 0.090 60
0.1 22 0.585 0.976 0.942 0.060 21 0.573 0.960 0.919 0.061 62
0.2 24 0.862 0.976 0.957 0.018 23 0.836 0.960 0.936 0.022 56
0.3 22 0.458 0.976 0.946 0.073 20 0.452 0.961 0.925 0.073 55
0.4 22 0.398 0.978 0.938 0.094 22 0.392 0.963 0.918 0.093 56
0.5 22 0.295 0.976 0.943 0.092 19 0.292 0.963 0.921 0.091 54

In [25]:
agg(['weight_prune_perc'], low_sparsity)


Out[25]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 22 0.947 0.973 0.964 0.007 21 0.931 0.960 0.949 0.007 22
0.1 20 0.932 0.976 0.961 0.011 19 0.908 0.960 0.946 0.013 23
0.2 22 0.938 0.976 0.964 0.008 19 0.911 0.960 0.949 0.011 23
0.3 21 0.940 0.976 0.964 0.009 18 0.920 0.961 0.949 0.010 23
0.4 22 0.943 0.978 0.964 0.009 22 0.921 0.963 0.949 0.010 23
0.5 22 0.946 0.976 0.964 0.008 18 0.925 0.963 0.949 0.010 23

In [26]:
agg(['weight_prune_perc'], high_sparsity)


Out[26]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 23 0.402 0.962 0.874 0.170 19 0.396 0.925 0.843 0.161 15
0.1 27 0.585 0.968 0.904 0.110 24 0.573 0.941 0.874 0.105 15
0.2 25 0.862 0.969 0.943 0.029 24 0.836 0.940 0.911 0.027 13
0.3 24 0.458 0.966 0.894 0.148 21 0.452 0.937 0.863 0.140 12
0.4 22 0.398 0.967 0.864 0.189 23 0.392 0.936 0.834 0.178 12
0.5 21 0.295 0.964 0.870 0.217 21 0.292 0.931 0.833 0.205 9

In [27]:
agg(['weight_prune_perc'], avg_sparsity)


Out[27]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 19 0.895 0.967 0.948 0.021 18 0.853 0.943 0.922 0.025 23
0.1 20 0.888 0.970 0.947 0.025 21 0.851 0.948 0.922 0.032 24
0.2 27 0.932 0.974 0.959 0.011 26 0.909 0.954 0.938 0.011 20
0.3 22 0.912 0.974 0.956 0.015 22 0.884 0.953 0.935 0.018 20
0.4 22 0.902 0.973 0.953 0.021 21 0.873 0.952 0.931 0.024 21
0.5 22 0.901 0.974 0.951 0.021 19 0.871 0.950 0.929 0.025 22
  • In low sparsity cases, results are the same for any amount of pruning. In average and high sparsity, there is a gaussian like curve, with the peak at around 0.2 (maybe extending to 0.3).
  • Results are consistent with what has been seen in previous experiments and in related papers.
  • Worth note that although results are better at 0.2, it also takes slightly longer to achieve better results compared to m

In [28]:
no_hebbian = (df['hebbian_prune_perc'] == 0)
agg(['weight_prune_perc'], no_hebbian)


Out[28]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 15 0.895 0.971 0.939 0.032 15 0.853 0.959 0.915 0.047 5
0.1 10 0.888 0.960 0.928 0.030 10 0.851 0.951 0.903 0.042 7
0.2 22 0.938 0.966 0.953 0.011 21 0.911 0.953 0.937 0.016 5
0.3 13 0.912 0.965 0.944 0.025 16 0.884 0.953 0.926 0.031 4
0.4 15 0.902 0.964 0.944 0.025 16 0.876 0.953 0.926 0.031 5
0.5 22 0.943 0.963 0.955 0.009 21 0.929 0.950 0.940 0.011 5
  • Somewhat inconsistent result looking at cases where there is no hebbian learning, only pruning by magnitude. There is an anomaly at the last entry where 50% of the weights are pruned - results are similar to 20%.
  • Number of samples averaged from is a lot lower in this pivot
What is the optimal combination of weight and magnitude pruning?

In [29]:
pd.pivot_table(df, 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[29]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.0 0.939 ± 0.032 0.928 ± 0.030 0.953 ± 0.011 0.944 ± 0.025 0.944 ± 0.025 0.955 ± 0.009
0.1 0.940 ± 0.022 0.944 ± 0.017 0.950 ± 0.012 0.948 ± 0.011 0.935 ± 0.025 0.940 ± 0.024
0.2 0.905 ± 0.167 0.914 ± 0.110 0.951 ± 0.015 0.908 ± 0.150 0.951 ± 0.019 0.943 ± 0.022
0.3 0.906 ± 0.124 0.942 ± 0.058 0.957 ± 0.009 0.957 ± 0.012 0.921 ± 0.127 0.955 ± 0.015
0.4 0.960 ± 0.010 0.949 ± 0.054 0.966 ± 0.007 0.968 ± 0.004 0.918 ± 0.164 0.966 ± 0.005
0.5 0.963 ± 0.009 0.967 ± 0.007 0.960 ± 0.032 0.952 ± 0.058 0.970 ± 0.004 0.908 ± 0.204

In [40]:
pd.pivot_table(df[low_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[40]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
kwinners hebbian_prune_perc
False 0.0 0.971 ± nan 0.946 ± 0.020 0.966 ± nan 0.951 ± 0.016 0.954 ± 0.015 0.963 ± nan
0.1 0.957 ± 0.011 0.952 ± 0.008 0.959 ± 0.003 0.955 ± 0.011 0.955 ± 0.014 0.954 ± 0.008
0.2 0.960 ± 0.004 0.948 ± 0.014 0.960 ± 0.003 0.963 ± 0.006 0.963 ± 0.009 0.960 ± 0.003
0.3 0.963 ± 0.001 0.965 ± 0.001 0.963 ± 0.002 0.966 ± 0.001 0.964 ± 0.002 0.964 ± 0.000
0.4 0.967 ± 0.002 0.968 ± 0.004 0.969 ± 0.004 0.970 ± 0.002 0.970 ± 0.004 0.969 ± 0.004
0.5 0.970 ± 0.003 0.971 ± 0.005 0.971 ± 0.005 0.972 ± 0.003 0.974 ± 0.006 0.973 ± 0.004
True 0.0 0.959 ± nan 0.959 ± nan 0.950 ± 0.017 0.965 ± nan 0.963 ± nan 0.956 ± 0.010
0.1 0.956 ± 0.012 0.954 ± 0.008 0.955 ± 0.005 0.955 ± 0.009 0.955 ± 0.011 0.958 ± 0.016
0.2 0.966 ± 0.006 0.962 ± 0.002 0.964 ± 0.007 0.962 ± 0.007 0.961 ± 0.003 0.962 ± 0.007
0.3 0.964 ± 0.000 0.964 ± 0.002 0.964 ± 0.002 0.967 ± 0.002 0.964 ± 0.005 0.965 ± 0.002
0.4 0.967 ± 0.000 0.970 ± 0.003 0.969 ± 0.004 0.971 ± 0.002 0.970 ± 0.003 0.969 ± 0.007
0.5 0.972 ± 0.002 0.973 ± 0.004 0.973 ± 0.004 0.973 ± 0.004 0.973 ± 0.003 0.974 ± 0.003

In [41]:
pd.pivot_table(df[avg_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[41]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
kwinners hebbian_prune_perc
False 0.0 0.925 ± 0.042 0.903 ± 0.000 0.950 ± nan 0.912 ± nan 0.902 ± nan 0.956 ± nan
0.1 0.924 ± 0.026 0.943 ± 0.011 0.931 ± nan 0.936 ± nan 0.915 ± 0.016 0.911 ± 0.014
0.2 0.955 ± 0.004 0.930 ± 0.034 0.955 ± 0.003 0.950 ± 0.005 0.953 ± 0.007 0.950 ± 0.007
0.3 0.954 ± 0.011 0.958 ± 0.005 0.959 ± 0.003 0.960 ± 0.002 0.961 ± 0.002 0.961 ± 0.001
0.4 0.963 ± 0.003 0.965 ± 0.006 0.969 ± 0.005 0.968 ± 0.006 0.968 ± 0.002 0.967 ± 0.004
0.5 0.965 ± 0.002 0.968 ± 0.003 0.970 ± 0.005 0.969 ± 0.004 0.970 ± 0.003 0.969 ± 0.007
True 0.0 0.915 ± nan 0.919 ± 0.044 0.950 ± nan NaN 0.946 ± nan 0.943 ± nan
0.1 0.923 ± 0.018 0.926 ± 0.027 0.942 ± nan 0.940 ± 0.004 0.929 ± nan 0.937 ± 0.029
0.2 0.949 ± 0.008 0.954 ± 0.001 0.950 ± 0.001 0.956 ± 0.006 0.957 ± 0.007 0.928 ± 0.027
0.3 0.954 ± 0.009 0.961 ± 0.004 0.959 ± 0.001 0.962 ± 0.004 0.960 ± 0.004 0.958 ± 0.003
0.4 0.964 ± 0.005 0.966 ± 0.006 0.967 ± 0.007 0.964 ± 0.006 0.964 ± 0.008 0.966 ± 0.007
0.5 0.965 ± 0.004 0.968 ± 0.003 0.970 ± 0.004 0.970 ± 0.005 0.970 ± 0.004 0.968 ± 0.006
  • There is a more clear trend in the low sparsity case. Results from high sparsity are inconclusive, with several runs failing to "converge"
  • Weight pruning alone improves the model by up to 0.7% from 10% pruning to 50% magnitude pruning
  • Hebbian pruning alone improves the model by 1.5%
  • Both combined can increase from 1.5% seem in hebbian only to 1.8% improvement.
  • Comparisons above are from 0.1 to 0.5 pruning. There is a question left of why no pruning at both sides - the (0,0) point - it is an anomaly to the trend shown in the pivot.

In [39]:
pd.pivot_table(df[avg_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[39]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.0 0.921 ± 0.030 0.911 ± 0.027 0.950 ± 0.000 0.912 ± nan 0.924 ± 0.031 0.950 ± 0.009
0.1 0.923 ± 0.018 0.934 ± 0.019 0.937 ± 0.007 0.939 ± 0.003 0.919 ± 0.014 0.924 ± 0.024
0.2 0.952 ± 0.006 0.942 ± 0.024 0.953 ± 0.003 0.953 ± 0.006 0.955 ± 0.006 0.939 ± 0.020
0.3 0.954 ± 0.008 0.960 ± 0.004 0.959 ± 0.002 0.961 ± 0.003 0.961 ± 0.003 0.959 ± 0.002
0.4 0.964 ± 0.004 0.965 ± 0.005 0.968 ± 0.005 0.966 ± 0.005 0.966 ± 0.005 0.967 ± 0.004
0.5 0.965 ± 0.003 0.968 ± 0.002 0.970 ± 0.004 0.970 ± 0.004 0.970 ± 0.003 0.969 ± 0.005

In [31]:
pd.pivot_table(df[high_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[31]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.1 NaN NaN NaN NaN 0.900 ± nan NaN
0.2 0.764 ± 0.313 0.821 ± 0.205 0.932 ± 0.017 0.776 ± 0.276 0.919 ± 0.020 0.916 ± 0.011
0.3 0.802 ± 0.185 0.901 ± 0.095 0.945 ± 0.007 0.940 ± 0.007 0.811 ± 0.237 0.927 ± 0.008
0.4 0.948 ± 0.010 0.912 ± 0.090 0.959 ± 0.008 0.965 ± 0.001 0.817 ± 0.280 0.960 ± 0.003
0.5 0.953 ± 0.009 0.960 ± 0.009 0.937 ± 0.051 0.912 ± 0.096 0.965 ± 0.002 0.740 ± 0.386

In [ ]: