In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data


In [4]:
exps = ['mlp_heb.py_eval2', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [5]:
df.head(5)


Out[5]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... model network noise_level num_classes on_perc percent_on pruning_active pruning_es test_noise weight_prune_perc
0 0_hebbian_grow=True,hebbian_prune_perc=0,kwinn... 0.14510 0 0.145100 0 0.145100 0.145100 0.3787 0 0.3787 ... DSNNMixedHeb MLPHeb 0.15 10 0.05 0.3 True False True 0.0
1 1_hebbian_grow=False,hebbian_prune_perc=0,kwin... 0.10900 2 0.108100 3 0.108675 0.108100 0.1776 0 0.0958 ... DSNNMixedHeb MLPHeb 0.15 10 0.05 0.3 True False True 0.0
2 2_hebbian_grow=True,hebbian_prune_perc=0.1,kwi... 0.11015 0 0.110150 0 0.110150 0.110150 0.2563 0 0.2563 ... DSNNMixedHeb MLPHeb 0.15 10 0.05 0.3 True False True 0.0
3 3_hebbian_grow=False,hebbian_prune_perc=0.1,kw... 0.15345 1 0.105250 4 0.107833 0.105250 0.2067 1 0.0958 ... DSNNMixedHeb MLPHeb 0.15 10 0.05 0.3 True False True 0.0
4 4_hebbian_grow=True,hebbian_prune_perc=0.2,kwi... 0.39985 26 0.126217 0 0.391150 0.399183 0.4022 29 0.2342 ... DSNNMixedHeb MLPHeb 0.15 10 0.05 0.3 True False True 0.0

5 rows × 43 columns


In [6]:
df.columns


Out[6]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'noise_acc_max',
       'noise_acc_max_epoch', 'noise_acc_min', 'noise_acc_min_epoch',
       'noise_acc_median', 'noise_acc_last', 'epochs', 'experiment_file_name',
       'trial_time', 'mean_epoch_time', 'data_dir', 'dataset_name',
       'debug_sparse', 'debug_weights', 'device', 'hebbian_grow',
       'hebbian_prune_perc', 'hidden_sizes', 'input_size', 'kwinners', 'model',
       'network', 'noise_level', 'num_classes', 'on_perc', 'percent_on',
       'pruning_active', 'pruning_es', 'test_noise', 'weight_prune_perc'],
      dtype='object')

In [7]:
df.iloc[10]


Out[7]:
Experiment Name         10_hebbian_grow=True,hebbian_prune_perc=0.5,kw...
train_acc_max                                                       0.952
train_acc_max_epoch                                                    29
train_acc_min                                                     0.10955
train_acc_min_epoch                                                     0
train_acc_median                                                 0.943842
train_acc_last                                                      0.952
val_acc_max                                                        0.9494
val_acc_max_epoch                                                      20
val_acc_min                                                        0.1135
val_acc_min_epoch                                                       0
val_acc_median                                                    0.93935
val_acc_last                                                       0.9474
noise_acc_max                                                      0.9137
noise_acc_max_epoch                                                    27
noise_acc_min                                                      0.1135
noise_acc_min_epoch                                                     0
noise_acc_median                                                  0.89965
noise_acc_last                                                     0.9079
epochs                                                                 30
experiment_file_name    /Users/lsouza/nta/results/mlp_heb.py_eval2/exp...
trial_time                                                        13.1363
mean_epoch_time                                                  0.437876
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                         True
hebbian_prune_perc                                                    0.5
hidden_sizes                                                          100
input_size                                                            784
kwinners                                                             True
model                                                        DSNNMixedHeb
network                                                            MLPHeb
noise_level                                                          0.15
num_classes                                                            10
on_perc                                                              0.05
percent_on                                                            0.3
pruning_active                                                       True
pruning_es                                                          False
test_noise                                                           True
weight_prune_perc                                                       0
Name: 10, dtype: object

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", input_size=784, num_classes=10, hidden_sizes=[100, 100, 100], data_dir=os.path.expanduser("~/nta/datasets"), # model related model="DSNNMixedHeb", network="MLPHeb", kwinners=tune.grid_search([True, False]), # 2 percent_on=0.3, on_perc=tune.grid_search([0.05, 0.1, 0.2]), # 3 # sparse related hebbian_prune_perc=tune.grid_search([0, 0.1, 0.2, 0.3, 0.4, 0.5]), # 6 weight_prune_perc=tune.grid_search([0, 0.1, 0.2, 0.3, 0.4, 0.5]), # 6 pruning_es=False, pruning_active=True, hebbian_grow=tune.grid_search([True, False]), # 2 # additional validation test_noise=True, noise_level=0.15, # test with more agressive noise # debugging debug_weights=True, debug_sparse=True, # define this above stop={"training_iteration": 30}, num_samples=1, )

In [8]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[8]:
89

In [9]:
# Removing failed trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[9]:
(343, 43)

In [38]:
# which ones failed?
df_origin['failed'] = df_origin["epochs"]<30
df_origin.groupby(['on_perc'])['failed'].agg('sum')


Out[38]:
on_perc
0.05    68.0
0.10    14.0
0.20     7.0
Name: failed, dtype: float64

In [10]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'noise_acc_max_epoch': round_mean,
                   'noise_acc_max': stats,
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'noise_acc_max_epoch': round_mean,
                   'noise_acc_max': stats,
                   'model': ['count']})).round(round)
Did Kwinners perform better than ReLU?

In [11]:
agg(['kwinners'])


Out[11]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
kwinners
False 22 0.398 0.978 0.945 0.064 20 0.392 0.963 0.924 0.065 172
True 23 0.295 0.976 0.941 0.086 21 0.292 0.963 0.920 0.085 171

In [12]:
high_sparsity = (df['on_perc']==0.05)
avg_sparsity = (df['on_perc']==0.1)
low_sparsity = (df['on_perc']==0.2)

In [13]:
agg(['kwinners'], low_sparsity)


Out[13]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
kwinners
False 21 0.932 0.978 0.963 0.009 19 0.908 0.963 0.948 0.011 69
True 22 0.938 0.976 0.964 0.008 20 0.911 0.963 0.949 0.010 68

In [14]:
agg(['kwinners'], high_sparsity)


Out[14]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
kwinners
False 24 0.398 0.969 0.904 0.128 22 0.392 0.941 0.872 0.121 37
True 23 0.295 0.967 0.882 0.166 22 0.292 0.937 0.851 0.157 39
  • No evidence of significant difference. In networks with high sparsity, the impact of kWinners is worst, which is expected since kWinners (at 30%) will make the activations more sparse than ReLU (which is 50% sparse on average)
What is the optimal level of weight sparsity?

In [15]:
agg(['on_perc'])


Out[15]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
on_perc
0.05 24 0.295 0.969 0.893 0.148 22 0.292 0.941 0.861 0.140 76
0.10 22 0.888 0.974 0.952 0.020 21 0.851 0.954 0.929 0.024 130
0.20 22 0.932 0.978 0.963 0.009 19 0.908 0.963 0.948 0.010 137
  • Sparsity at 80 and 90% levels seem more or less equivalent, difference is 1 point in accuracy. The jump from 90 to 95% shows a drastic increase in acc, of 6 points.
Hebbian grow helps learning?

In [16]:
agg(['hebbian_grow'])


Out[16]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 22 0.888 0.978 0.956 0.017 21 0.851 0.963 0.932 0.022 176
True 22 0.295 0.972 0.930 0.106 20 0.292 0.960 0.911 0.105 167

In [17]:
agg(['hebbian_grow'], low_sparsity)


Out[17]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 20 0.932 0.978 0.962 0.011 18 0.908 0.963 0.945 0.013 65
True 23 0.938 0.972 0.965 0.005 21 0.914 0.960 0.951 0.006 72

In [18]:
agg(['hebbian_grow'], high_sparsity)


Out[18]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_grow
False 25 0.905 0.969 0.951 0.016 24 0.853 0.941 0.916 0.019 48
True 22 0.295 0.954 0.793 0.210 19 0.292 0.923 0.769 0.200 28
  • No strong evidence it helps in low sparsity case. In high sparsity (95%), seems very harmful
Hebbian pruning helps learning?

In [19]:
agg(['hebbian_prune_perc'])


Out[19]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 16 0.888 0.971 0.943 0.024 16 0.851 0.959 0.923 0.033 31
0.1 20 0.900 0.969 0.942 0.019 20 0.868 0.957 0.922 0.026 45
0.2 23 0.402 0.970 0.928 0.102 22 0.396 0.960 0.907 0.101 64
0.3 23 0.538 0.968 0.939 0.078 22 0.530 0.952 0.918 0.077 67
0.4 24 0.398 0.974 0.953 0.073 22 0.392 0.960 0.931 0.072 67
0.5 23 0.295 0.978 0.953 0.085 19 0.292 0.963 0.930 0.084 69

In [20]:
agg(['hebbian_prune_perc'], low_sparsity)


Out[20]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 18 0.932 0.971 0.956 0.011 18 0.908 0.959 0.941 0.016 17
0.1 22 0.945 0.969 0.955 0.008 22 0.925 0.957 0.940 0.011 24
0.2 22 0.938 0.970 0.961 0.006 21 0.914 0.960 0.946 0.009 24
0.3 23 0.960 0.968 0.964 0.002 21 0.945 0.952 0.950 0.002 24
0.4 23 0.964 0.974 0.969 0.003 20 0.948 0.960 0.954 0.003 24
0.5 21 0.967 0.978 0.972 0.003 15 0.950 0.963 0.957 0.004 24

In [21]:
agg(['hebbian_prune_perc'], high_sparsity)


Out[21]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.1 29 0.900 0.900 0.900 NaN 29 0.879 0.879 0.879 NaN 1
0.2 25 0.402 0.952 0.847 0.185 24 0.396 0.920 0.815 0.174 16
0.3 21 0.538 0.952 0.882 0.131 21 0.530 0.926 0.852 0.124 19
0.4 25 0.398 0.966 0.918 0.133 23 0.392 0.937 0.886 0.126 19
0.5 24 0.295 0.969 0.914 0.149 21 0.292 0.941 0.882 0.142 21
  • There is good evidence it helps. The trend is very clear in the low sparsity (80% sparse) cases.

In [22]:
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], no_magnitude)


Out[22]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 15 0.895 0.971 0.939 0.032 15 0.853 0.959 0.915 0.047 5
0.1 18 0.905 0.965 0.940 0.022 17 0.880 0.953 0.920 0.028 8
0.2 23 0.402 0.970 0.905 0.167 22 0.396 0.960 0.885 0.163 11
0.3 21 0.561 0.964 0.906 0.124 21 0.549 0.951 0.886 0.122 12
0.4 24 0.938 0.968 0.960 0.010 22 0.903 0.954 0.934 0.017 12
0.5 22 0.942 0.973 0.963 0.009 16 0.901 0.954 0.935 0.017 12

In [23]:
no_magnitude = (df['weight_prune_perc'] == 0)
agg(['hebbian_prune_perc'], (no_magnitude & low_sparsity))


Out[23]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
hebbian_prune_perc
0.0 24 0.959 0.971 0.965 0.009 29 0.947 0.959 0.953 0.008 2
0.1 23 0.947 0.965 0.956 0.009 21 0.931 0.953 0.942 0.012 4
0.2 21 0.958 0.970 0.963 0.005 20 0.941 0.960 0.948 0.009 4
0.3 24 0.963 0.964 0.964 0.001 24 0.945 0.951 0.948 0.003 4
0.4 22 0.966 0.968 0.967 0.001 24 0.948 0.954 0.950 0.003 4
0.5 20 0.968 0.973 0.971 0.002 10 0.950 0.954 0.952 0.002 4
  • Results seem similar even when no magnitude pruning is involved, only hebbian pruning
Magnitude pruning helps learning?

In [24]:
agg(['weight_prune_perc'])


Out[24]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 21 0.402 0.973 0.935 0.091 19 0.396 0.960 0.912 0.090 60
0.1 22 0.585 0.976 0.942 0.060 21 0.573 0.960 0.919 0.061 62
0.2 24 0.862 0.976 0.957 0.018 23 0.836 0.960 0.936 0.022 56
0.3 22 0.458 0.976 0.946 0.073 20 0.452 0.961 0.925 0.073 55
0.4 22 0.398 0.978 0.938 0.094 22 0.392 0.963 0.918 0.093 56
0.5 22 0.295 0.976 0.943 0.092 19 0.292 0.963 0.921 0.091 54

In [25]:
agg(['weight_prune_perc'], low_sparsity)


Out[25]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 22 0.947 0.973 0.964 0.007 21 0.931 0.960 0.949 0.007 22
0.1 20 0.932 0.976 0.961 0.011 19 0.908 0.960 0.946 0.013 23
0.2 22 0.938 0.976 0.964 0.008 19 0.911 0.960 0.949 0.011 23
0.3 21 0.940 0.976 0.964 0.009 18 0.920 0.961 0.949 0.010 23
0.4 22 0.943 0.978 0.964 0.009 22 0.921 0.963 0.949 0.010 23
0.5 22 0.946 0.976 0.964 0.008 18 0.925 0.963 0.949 0.010 23

In [26]:
agg(['weight_prune_perc'], high_sparsity)


Out[26]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 23 0.402 0.962 0.874 0.170 19 0.396 0.925 0.843 0.161 15
0.1 27 0.585 0.968 0.904 0.110 24 0.573 0.941 0.874 0.105 15
0.2 25 0.862 0.969 0.943 0.029 24 0.836 0.940 0.911 0.027 13
0.3 24 0.458 0.966 0.894 0.148 21 0.452 0.937 0.863 0.140 12
0.4 22 0.398 0.967 0.864 0.189 23 0.392 0.936 0.834 0.178 12
0.5 21 0.295 0.964 0.870 0.217 21 0.292 0.931 0.833 0.205 9

In [27]:
agg(['weight_prune_perc'], avg_sparsity)


Out[27]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 19 0.895 0.967 0.948 0.021 18 0.853 0.943 0.922 0.025 23
0.1 20 0.888 0.970 0.947 0.025 21 0.851 0.948 0.922 0.032 24
0.2 27 0.932 0.974 0.959 0.011 26 0.909 0.954 0.938 0.011 20
0.3 22 0.912 0.974 0.956 0.015 22 0.884 0.953 0.935 0.018 20
0.4 22 0.902 0.973 0.953 0.021 21 0.873 0.952 0.931 0.024 21
0.5 22 0.901 0.974 0.951 0.021 19 0.871 0.950 0.929 0.025 22
  • In low sparsity cases, results are the same for any amount of pruning. In average and high sparsity, there is a gaussian like curve, with the peak at around 0.2 (maybe extending to 0.3).
  • Results are consistent with what has been seen in previous experiments and in related papers.
  • Worth note that although results are better at 0.2, it also takes slightly longer to achieve better results compared to m

In [28]:
no_hebbian = (df['hebbian_prune_perc'] == 0)
agg(['weight_prune_perc'], no_hebbian)


Out[28]:
val_acc_max_epoch val_acc_max noise_acc_max_epoch noise_acc_max model
round_mean min max mean std round_mean min max mean std count
weight_prune_perc
0.0 15 0.895 0.971 0.939 0.032 15 0.853 0.959 0.915 0.047 5
0.1 10 0.888 0.960 0.928 0.030 10 0.851 0.951 0.903 0.042 7
0.2 22 0.938 0.966 0.953 0.011 21 0.911 0.953 0.937 0.016 5
0.3 13 0.912 0.965 0.944 0.025 16 0.884 0.953 0.926 0.031 4
0.4 15 0.902 0.964 0.944 0.025 16 0.876 0.953 0.926 0.031 5
0.5 22 0.943 0.963 0.955 0.009 21 0.929 0.950 0.940 0.011 5
  • Somewhat inconsistent result looking at cases where there is no hebbian learning, only pruning by magnitude. There is an anomaly at the last entry where 50% of the weights are pruned - results are similar to 20%.
  • Number of samples averaged from is a lot lower in this pivot
What is the optimal combination of weight and magnitude pruning?

In [29]:
pd.pivot_table(df, 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[29]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.0 0.939 ± 0.032 0.928 ± 0.030 0.953 ± 0.011 0.944 ± 0.025 0.944 ± 0.025 0.955 ± 0.009
0.1 0.940 ± 0.022 0.944 ± 0.017 0.950 ± 0.012 0.948 ± 0.011 0.935 ± 0.025 0.940 ± 0.024
0.2 0.905 ± 0.167 0.914 ± 0.110 0.951 ± 0.015 0.908 ± 0.150 0.951 ± 0.019 0.943 ± 0.022
0.3 0.906 ± 0.124 0.942 ± 0.058 0.957 ± 0.009 0.957 ± 0.012 0.921 ± 0.127 0.955 ± 0.015
0.4 0.960 ± 0.010 0.949 ± 0.054 0.966 ± 0.007 0.968 ± 0.004 0.918 ± 0.164 0.966 ± 0.005
0.5 0.963 ± 0.009 0.967 ± 0.007 0.960 ± 0.032 0.952 ± 0.058 0.970 ± 0.004 0.908 ± 0.204

In [40]:
pd.pivot_table(df[low_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[40]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
kwinners hebbian_prune_perc
False 0.0 0.971 ± nan 0.946 ± 0.020 0.966 ± nan 0.951 ± 0.016 0.954 ± 0.015 0.963 ± nan
0.1 0.957 ± 0.011 0.952 ± 0.008 0.959 ± 0.003 0.955 ± 0.011 0.955 ± 0.014 0.954 ± 0.008
0.2 0.960 ± 0.004 0.948 ± 0.014 0.960 ± 0.003 0.963 ± 0.006 0.963 ± 0.009 0.960 ± 0.003
0.3 0.963 ± 0.001 0.965 ± 0.001 0.963 ± 0.002 0.966 ± 0.001 0.964 ± 0.002 0.964 ± 0.000
0.4 0.967 ± 0.002 0.968 ± 0.004 0.969 ± 0.004 0.970 ± 0.002 0.970 ± 0.004 0.969 ± 0.004
0.5 0.970 ± 0.003 0.971 ± 0.005 0.971 ± 0.005 0.972 ± 0.003 0.974 ± 0.006 0.973 ± 0.004
True 0.0 0.959 ± nan 0.959 ± nan 0.950 ± 0.017 0.965 ± nan 0.963 ± nan 0.956 ± 0.010
0.1 0.956 ± 0.012 0.954 ± 0.008 0.955 ± 0.005 0.955 ± 0.009 0.955 ± 0.011 0.958 ± 0.016
0.2 0.966 ± 0.006 0.962 ± 0.002 0.964 ± 0.007 0.962 ± 0.007 0.961 ± 0.003 0.962 ± 0.007
0.3 0.964 ± 0.000 0.964 ± 0.002 0.964 ± 0.002 0.967 ± 0.002 0.964 ± 0.005 0.965 ± 0.002
0.4 0.967 ± 0.000 0.970 ± 0.003 0.969 ± 0.004 0.971 ± 0.002 0.970 ± 0.003 0.969 ± 0.007
0.5 0.972 ± 0.002 0.973 ± 0.004 0.973 ± 0.004 0.973 ± 0.004 0.973 ± 0.003 0.974 ± 0.003

In [41]:
pd.pivot_table(df[avg_sparsity], 
              index=['kwinners','hebbian_prune_perc'],
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[41]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
kwinners hebbian_prune_perc
False 0.0 0.925 ± 0.042 0.903 ± 0.000 0.950 ± nan 0.912 ± nan 0.902 ± nan 0.956 ± nan
0.1 0.924 ± 0.026 0.943 ± 0.011 0.931 ± nan 0.936 ± nan 0.915 ± 0.016 0.911 ± 0.014
0.2 0.955 ± 0.004 0.930 ± 0.034 0.955 ± 0.003 0.950 ± 0.005 0.953 ± 0.007 0.950 ± 0.007
0.3 0.954 ± 0.011 0.958 ± 0.005 0.959 ± 0.003 0.960 ± 0.002 0.961 ± 0.002 0.961 ± 0.001
0.4 0.963 ± 0.003 0.965 ± 0.006 0.969 ± 0.005 0.968 ± 0.006 0.968 ± 0.002 0.967 ± 0.004
0.5 0.965 ± 0.002 0.968 ± 0.003 0.970 ± 0.005 0.969 ± 0.004 0.970 ± 0.003 0.969 ± 0.007
True 0.0 0.915 ± nan 0.919 ± 0.044 0.950 ± nan NaN 0.946 ± nan 0.943 ± nan
0.1 0.923 ± 0.018 0.926 ± 0.027 0.942 ± nan 0.940 ± 0.004 0.929 ± nan 0.937 ± 0.029
0.2 0.949 ± 0.008 0.954 ± 0.001 0.950 ± 0.001 0.956 ± 0.006 0.957 ± 0.007 0.928 ± 0.027
0.3 0.954 ± 0.009 0.961 ± 0.004 0.959 ± 0.001 0.962 ± 0.004 0.960 ± 0.004 0.958 ± 0.003
0.4 0.964 ± 0.005 0.966 ± 0.006 0.967 ± 0.007 0.964 ± 0.006 0.964 ± 0.008 0.966 ± 0.007
0.5 0.965 ± 0.004 0.968 ± 0.003 0.970 ± 0.004 0.970 ± 0.005 0.970 ± 0.004 0.968 ± 0.006
  • There is a more clear trend in the low sparsity case. Results from high sparsity are inconclusive, with several runs failing to "converge"
  • Weight pruning alone improves the model by up to 0.7% from 10% pruning to 50% magnitude pruning
  • Hebbian pruning alone improves the model by 1.5%
  • Both combined can increase from 1.5% seem in hebbian only to 1.8% improvement.
  • Comparisons above are from 0.1 to 0.5 pruning. There is a question left of why no pruning at both sides - the (0,0) point - it is an anomaly to the trend shown in the pivot.

In [39]:
pd.pivot_table(df[avg_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[39]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.0 0.921 ± 0.030 0.911 ± 0.027 0.950 ± 0.000 0.912 ± nan 0.924 ± 0.031 0.950 ± 0.009
0.1 0.923 ± 0.018 0.934 ± 0.019 0.937 ± 0.007 0.939 ± 0.003 0.919 ± 0.014 0.924 ± 0.024
0.2 0.952 ± 0.006 0.942 ± 0.024 0.953 ± 0.003 0.953 ± 0.006 0.955 ± 0.006 0.939 ± 0.020
0.3 0.954 ± 0.008 0.960 ± 0.004 0.959 ± 0.002 0.961 ± 0.003 0.961 ± 0.003 0.959 ± 0.002
0.4 0.964 ± 0.004 0.965 ± 0.005 0.968 ± 0.005 0.966 ± 0.005 0.966 ± 0.005 0.967 ± 0.004
0.5 0.965 ± 0.003 0.968 ± 0.002 0.970 ± 0.004 0.970 ± 0.004 0.970 ± 0.003 0.969 ± 0.005

In [31]:
pd.pivot_table(df[high_sparsity], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[31]:
weight_prune_perc 0.0 0.1 0.2 0.3 0.4 0.5
hebbian_prune_perc
0.1 NaN NaN NaN NaN 0.900 ± nan NaN
0.2 0.764 ± 0.313 0.821 ± 0.205 0.932 ± 0.017 0.776 ± 0.276 0.919 ± 0.020 0.916 ± 0.011
0.3 0.802 ± 0.185 0.901 ± 0.095 0.945 ± 0.007 0.940 ± 0.007 0.811 ± 0.237 0.927 ± 0.008
0.4 0.948 ± 0.010 0.912 ± 0.090 0.959 ± 0.008 0.965 ± 0.001 0.817 ± 0.280 0.960 ± 0.003
0.5 0.953 ± 0.009 0.960 ± 0.009 0.937 ± 0.051 0.912 ± 0.096 0.965 ± 0.002 0.740 ± 0.386

In [ ]: