Experiment:

Evaluate pruning by magnitude weighted by coactivations.

Motivation.

Test new proposed method


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data


In [4]:
exps = ['improved_magpruning_test1', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [5]:
df.head(5)


Out[5]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... momentum network num_classes on_perc optim_alg pruning_early_stop test_noise use_kwinners weight_decay weight_prune_perc
0 0_weight_prune_perc=None 0.988317 28 0.925067 0 0.985508 0.987333 0.9761 17 0.9628 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 NaN
1 1_weight_prune_perc=0.1 0.992317 29 0.926417 0 0.988500 0.992317 0.9803 25 0.9562 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.1
2 2_weight_prune_perc=0.2 0.992067 24 0.925400 0 0.988567 0.991500 0.9793 28 0.9598 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.2
3 3_weight_prune_perc=0.3 0.991183 29 0.927700 0 0.987950 0.991183 0.9814 24 0.9612 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3
4 4_weight_prune_perc=0.4 0.990383 24 0.925767 0 0.986525 0.990117 0.9787 21 0.9632 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.4

5 rows × 41 columns


In [6]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [7]:
df.columns


Out[7]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_prune_perc', 'hidden_sizes', 'input_size', 'learning_rate',
       'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model', 'momentum',
       'network', 'num_classes', 'on_perc', 'optim_alg', 'pruning_early_stop',
       'test_noise', 'use_kwinners', 'weight_decay', 'weight_prune_perc'],
      dtype='object')

In [8]:
df.shape


Out[8]:
(24, 41)

In [9]:
df.iloc[1]


Out[9]:
Experiment Name                                   1_weight_prune_perc=0.1
train_acc_max                                                    0.992317
train_acc_max_epoch                                                    29
train_acc_min                                                    0.926417
train_acc_min_epoch                                                     0
train_acc_median                                                   0.9885
train_acc_last                                                   0.992317
val_acc_max                                                        0.9803
val_acc_max_epoch                                                      25
val_acc_min                                                        0.9562
val_acc_min_epoch                                                       0
val_acc_median                                                     0.9769
val_acc_last                                                       0.9764
epochs                                                                 30
experiment_file_name    /Users/lsouza/nta/results/improved_magpruning_...
trial_time                                                        17.3449
mean_epoch_time                                                  0.578165
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_prune_perc                                                      0
hidden_sizes                                                          100
input_size                                                            784
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                     DSNNWeightedMag
momentum                                                              0.9
network                                                            MLPHeb
num_classes                                                            10
on_perc                                                               0.2
optim_alg                                                             SGD
pruning_early_stop                                                      0
test_noise                                                          False
use_kwinners                                                        False
weight_decay                                                       0.0001
weight_prune_perc                                                     0.1
Name: 1, dtype: object

In [10]:
df.groupby('model')['model'].count()


Out[10]:
model
DSNNWeightedMag    24
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, use_kwinners=False, # model related model="DSNNWeightedMag", on_perc=0.2, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=None, weight_prune_perc=tune.grid_search([None, 0.1, 0.2, 0.3, 0.4, 0.5]), pruning_early_stop=0, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_test1", num_samples=4, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 30}, resources_per_trial={"cpu": 1, "gpu": 0.165}, loggers=DEFAULT_LOGGERS, verbose=0, )

In [11]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[11]:
0

In [12]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[12]:
(24, 41)

In [13]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']


Out[13]:
Series([], Name: epochs, dtype: int64)

In [14]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
What are optimal levels of hebbian and weight pruning

In [21]:
agg(['weight_prune_perc'])


Out[21]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
weight_prune_perc
0.0 20 0.976 0.977 0.977 0.000 4
0.1 22 0.979 0.982 0.980 0.001 4
0.2 27 0.978 0.979 0.979 0.001 4
0.3 24 0.980 0.981 0.980 0.001 4
0.4 24 0.979 0.981 0.980 0.001 4
0.5 23 0.978 0.980 0.979 0.001 4

In [22]:
multi2 = (df['weight_prune_perc'] % 0.2 == 0)
agg(['weight_prune_perc'], multi2)


Out[22]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
weight_prune_perc
0.0 20 0.976 0.977 0.977 0.000 4
0.2 27 0.978 0.979 0.979 0.001 4
0.4 24 0.979 0.981 0.980 0.001 4
  • No relevant difference

In [23]:
pd.pivot_table(df[filter], 
              index='hebbian_prune_perc',
              columns='weight_prune_perc',
              values='val_acc_max',
              aggfunc=mean_and_std)


Out[23]:
weight_prune_perc 0.0 0.2 0.4 0.6 0.8
hebbian_prune_perc
0.0 0.976 ± 0.000 0.981 ± 0.001 0.980 ± 0.000 0.980 ± 0.001 0.979 ± 0.001
0.2 0.974 ± 0.001 0.979 ± 0.001 0.979 ± 0.003 0.979 ± 0.002 0.977 ± 0.001
0.4 0.971 ± 0.001 0.980 ± 0.001 0.979 ± 0.001 0.979 ± 0.001 0.978 ± 0.001
0.6 0.969 ± 0.001 0.979 ± 0.001 0.980 ± 0.001 0.979 ± 0.001 0.978 ± 0.001
0.8 0.966 ± 0.001 0.980 ± 0.001 0.980 ± 0.002 0.980 ± 0.001 0.978 ± 0.000
1.0 0.964 ± 0.001 0.981 ± 0.001 0.981 ± 0.001 0.980 ± 0.001 0.980 ± 0.001

In [19]:
df.shape


Out[19]:
(108, 42)

Conclusions:

  • No pruning leads (0,0) to acc of 0.976
  • Pruning all connections at every epoch (1,0) leads to acc of 0.964
  • Best performing model is still no hebbian pruning, and weight pruning set to 0.2 (0.981)
  • Pruning only by hebbian learning decreases accuracy
  • Combining hebbian and weight magnitude is not an improvement compared to simple weight magnitude pruning

In [ ]: