Experiment:

Evaluate pruning by magnitude weighted by coactivations (more thorough evaluation), compare it to baseline (SET).

Motivation.

Check if results are consistently above baseline.

Conclusion

  • No significant difference between both models
  • No support for early stopping

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data


In [3]:
base = 'MLPHeb-non-binary-coacts-2019-10-04'
exps = [
    os.path.join(base, exp) for exp in [
        'mlp-heb', 
        'mlp-SET',
        'mlp-WeightedMag',
        'mlp-sparse',
    ]
]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)


exp_name /Users/mcaporale/nta/results/MLPHeb-non-binary-coacts-2019-10-04/mlp-heb/experiment_state-2019-10-04_01-34-19.json
exp_name /Users/mcaporale/nta/results/MLPHeb-non-binary-coacts-2019-10-04/mlp-SET/experiment_state-2019-10-04_01-34-19.json
exp_name /Users/mcaporale/nta/results/MLPHeb-non-binary-coacts-2019-10-04/mlp-WeightedMag/experiment_state-2019-10-04_01-34-19.json
exp_name /Users/mcaporale/nta/results/MLPHeb-non-binary-coacts-2019-10-04/mlp-sparse/experiment_state-2019-10-04_01-34-19.json

In [4]:
df.head(5)['hebbian_prune_perc']


Out[4]:
0    0.3
1    0.3
2    0.3
3    0.3
4    0.3
Name: hebbian_prune_perc, dtype: object

In [5]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [6]:
df.columns


Out[6]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'val_acc_all',
       'epochs', 'experiment_file_name', 'trial_time', 'mean_epoch_time',
       'batch_norm', 'data_dir', 'dataset_name', 'debug_sparse', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'moving_average_alpha', 'network', 'num_classes', 'on_perc',
       'optim_alg', 'pruning_early_stop', 'test_noise',
       'use_binary_coactivations', 'weight_decay', 'weight_prune_perc'],
      dtype='object')

In [7]:
df.shape


Out[7]:
(171, 43)

In [8]:
df.iloc[1]


Out[8]:
Experiment Name                            1_model=DSNNMixedHeb,on_perc=0.005
train_acc_max                                                        0.112367
train_acc_max_epoch                                                        31
train_acc_min                                                         0.10525
train_acc_min_epoch                                                        25
train_acc_median                                                     0.112367
train_acc_last                                                       0.112367
val_acc_max                                                            0.1135
val_acc_max_epoch                                                           0
val_acc_min                                                            0.0958
val_acc_min_epoch                                                           4
val_acc_median                                                         0.1135
val_acc_last                                                           0.1135
val_acc_all                 0     0.1135
1     0.1028
2     0.1028
3     0...
epochs                                                                     90
experiment_file_name        /Users/mcaporale/nta/results/MLPHeb-non-binary...
trial_time                                                            68.0044
mean_epoch_time                                                      0.755605
batch_norm                                                               True
data_dir                                            /home/ubuntu/nta/datasets
dataset_name                                                            MNIST
debug_sparse                                                             True
device                                                                   cuda
hebbian_grow                                                             True
hebbian_prune_perc                                                        0.3
hidden_sizes                                                              100
input_size                                                                784
learning_rate                                                             0.1
lr_gamma                                                                  0.1
lr_milestones                                                              60
lr_scheduler                                                      MultiStepLR
model                                                            DSNNMixedHeb
momentum                                                                  0.9
moving_average_alpha                                                      0.6
network                                                                MLPHeb
num_classes                                                                10
on_perc                                                                 0.005
optim_alg                                                                 SGD
pruning_early_stop                                                          2
test_noise                                                              False
use_binary_coactivations                                                False
weight_decay                                                           0.0001
weight_prune_perc                                                           0
Name: 1, dtype: object

In [9]:
df.groupby('model')['model'].count()


Out[9]:
model
DSNNMixedHeb       82
DSNNWeightedMag    41
SparseModel        48
Name: model, dtype: int64

Analysis

Experiment Details

# experiment configurations base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, use_kwinners=False, # model related model=tune.grid_search(["DSNNWeightedMag", "DSNNMixedHeb", "SparseModel"]), on_perc=tune.grid_search(list(np.arange(0, 0.101, 0.005))), optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30, 60, 90], lr_gamma=0.1, # sparse related hebbian_prune_perc=None, hebbian_grow=False, weight_prune_perc=0.3, pruning_early_stop=2, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_eval8", num_samples=7, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={"cpu": 1, "gpu": 0.20}, loggers=DEFAULT_LOGGERS, verbose=0, )

In [10]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[10]:
0

In [11]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[11]:
(171, 43)

In [12]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']


Out[12]:
Series([], Name: epochs, dtype: int64)

In [13]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
Does improved weight pruning outperforms regular SET

In [14]:
agg(['model'])


Out[14]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
model
DSNNMixedHeb 39 0.114 0.983 0.842 0.261 82
DSNNWeightedMag 35 0.114 0.981 0.863 0.261 41
SparseModel 40 0.114 0.975 0.769 0.314 48

In [15]:
agg(['on_perc'])


Out[15]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
on_perc
0.000 0 0.114 0.114 0.114 0.000 9
0.005 0 0.114 0.114 0.114 0.000 9
0.010 30 0.314 0.710 0.549 0.145 9
0.015 19 0.383 0.793 0.600 0.166 9
0.020 46 0.822 0.952 0.888 0.056 9
0.025 41 0.853 0.959 0.903 0.047 9
0.030 47 0.899 0.971 0.940 0.029 8
0.035 35 0.906 0.970 0.945 0.025 8
0.040 44 0.928 0.976 0.957 0.020 8
0.045 47 0.934 0.976 0.959 0.017 8
0.050 38 0.941 0.978 0.963 0.014 8
0.055 41 0.942 0.979 0.964 0.015 8
0.060 47 0.945 0.980 0.967 0.014 8
0.065 48 0.952 0.979 0.969 0.011 8
0.070 51 0.955 0.980 0.972 0.011 8
0.075 52 0.960 0.980 0.972 0.009 8
0.080 51 0.958 0.981 0.973 0.010 8
0.085 54 0.961 0.981 0.974 0.008 8
0.090 45 0.957 0.982 0.974 0.009 8
0.095 41 0.959 0.981 0.974 0.009 8
0.100 44 0.967 0.983 0.975 0.006 5

In [16]:
def model_name(row):
    
    if row['model'] == 'DSNNWeightedMag':
        return 'DSNN-WM'

    elif row['model'] == 'DSNNMixedHeb':
        if row['hebbian_prune_perc'] == 0.3:
           return 'SET'

        elif row['weight_prune_perc'] == 0.3:
           return 'DSNN-Heb'

    elif row['model'] == 'SparseModel':
        return 'Static'
    
    assert False, "This should cover all cases. Got {} h - {} w - {}".format(row['model'], row['hebbian_prune_perc'], row['weight_prune_perc'])

def test(val):
    print(type(val))
    print(val)
    print()

In [17]:
df['model2'] = df.apply(model_name, axis=1)

In [18]:
fltr = df['model2'] != 'Sparse'
agg(['on_perc', 'model2'], filter=fltr)


Out[18]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
on_perc model2
0.000 DSNN-Heb 0 0.114 0.114 0.114 0.000 2
DSNN-WM 0 0.114 0.114 0.114 0.000 2
SET 0 0.114 0.114 0.114 0.000 2
Static 0 0.114 0.114 0.114 0.000 3
0.005 DSNN-Heb 1 0.114 0.114 0.114 0.000 2
DSNN-WM 0 0.114 0.114 0.114 0.000 2
SET 0 0.114 0.114 0.114 0.000 2
Static 0 0.114 0.114 0.114 0.000 3
0.010 DSNN-Heb 38 0.610 0.635 0.623 0.017 2
DSNN-WM 38 0.678 0.710 0.694 0.022 2
SET 2 0.536 0.662 0.599 0.089 2
Static 38 0.314 0.413 0.369 0.051 3
0.015 DSNN-Heb 13 0.710 0.737 0.724 0.019 2
DSNN-WM 19 0.689 0.793 0.741 0.074 2
SET 4 0.526 0.739 0.632 0.151 2
Static 32 0.383 0.412 0.401 0.016 3
0.020 DSNN-Heb 62 0.949 0.952 0.951 0.002 2
DSNN-WM 32 0.934 0.948 0.941 0.010 2
SET 4 0.854 0.854 0.854 0.000 2
Static 74 0.822 0.844 0.833 0.011 3
0.025 DSNN-Heb 55 0.956 0.959 0.957 0.002 2
DSNN-WM 26 0.943 0.950 0.946 0.005 2
SET 8 0.865 0.869 0.867 0.003 2
Static 65 0.853 0.871 0.861 0.009 3
0.030 DSNN-Heb 80 0.968 0.971 0.969 0.002 2
DSNN-WM 35 0.959 0.962 0.961 0.003 2
SET 10 0.899 0.906 0.903 0.005 2
Static 64 0.925 0.932 0.928 0.004 2
0.035 DSNN-Heb 36 0.966 0.970 0.968 0.002 2
DSNN-WM 58 0.964 0.966 0.965 0.001 2
... ... ... ... ... ... ... ...
0.065 SET 88 0.952 0.954 0.953 0.001 2
Static 37 0.966 0.967 0.966 0.000 2
0.070 DSNN-Heb 53 0.979 0.980 0.980 0.001 2
DSNN-WM 32 0.980 0.980 0.980 0.000 2
SET 86 0.955 0.956 0.956 0.001 2
Static 32 0.970 0.972 0.971 0.001 2
0.075 DSNN-Heb 36 0.979 0.980 0.980 0.001 2
DSNN-WM 43 0.978 0.980 0.979 0.001 2
SET 88 0.960 0.961 0.960 0.000 2
Static 40 0.968 0.968 0.968 0.000 2
0.080 DSNN-Heb 36 0.980 0.981 0.981 0.001 2
DSNN-WM 60 0.981 0.981 0.981 0.000 2
SET 74 0.958 0.959 0.958 0.001 2
Static 34 0.972 0.973 0.973 0.000 2
0.085 DSNN-Heb 46 0.980 0.981 0.980 0.000 2
DSNN-WM 46 0.980 0.980 0.980 0.000 2
SET 84 0.961 0.962 0.961 0.000 2
Static 39 0.973 0.974 0.973 0.001 2
0.090 DSNN-Heb 42 0.982 0.982 0.982 0.000 2
DSNN-WM 36 0.980 0.980 0.980 0.000 2
SET 57 0.957 0.963 0.960 0.004 2
Static 45 0.973 0.973 0.973 0.000 2
0.095 DSNN-Heb 34 0.980 0.980 0.980 0.000 2
DSNN-WM 33 0.980 0.981 0.980 0.001 2
SET 54 0.959 0.962 0.960 0.002 2
Static 41 0.974 0.975 0.975 0.000 2
0.100 DSNN-Heb 32 0.983 0.983 0.983 NaN 1
DSNN-WM 36 0.981 0.981 0.981 NaN 1
SET 87 0.967 0.967 0.967 NaN 1
Static 32 0.973 0.973 0.973 0.000 2

84 rows × 6 columns


In [19]:
# translate model names
rcParams['figure.figsize'] = 16, 8
# d = {
#     'DSNNWeightedMag': 'DSNN',
#     'DSNNMixedHeb': 'SET',
#     'SparseModel': 'Static',        
# }
# df_plot = df.copy()
# df_plot['model'] = df_plot['model'].apply(lambda x, i: model_name(x, i))

In [20]:
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df, x='on_perc', y='val_acc_max', hue='model2')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x1345af7b8>

In [22]:
rcParams['figure.figsize'] = 16, 8
filter = df['model'] != 'Static'
sns.lineplot(data=df[filter], x='on_perc', y='val_acc_max_epoch', hue='model2')


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x13423ee10>

In [24]:
sns.lineplot(data=df, x='on_perc', y='val_acc_last', hue='model2')


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x134736b38>

In [ ]:


In [ ]: