Experiment:

Evaluate hebbian growth

Motivation:

Control all other variables and evaluate hebbian growth alone

Conclusions:

  • Lower accuracy, about 0.2%, but it converges faster (18 vs 24) compared to random growth.
  • Results are consistent with expectation: it accelerates learning, but focusing early on few specific units might prevent the neural network from finding a different set of connections that can lead to higher performance

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("../../")

In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from dynamic_sparse.common.browser import *

Load and check data


In [4]:
exps = ['neurips_debug_test13', ]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)

In [5]:
df.head(5)


Out[5]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... momentum network num_classes on_perc optim_alg pruning_early_stop test_noise use_kwinners weight_decay weight_prune_perc
0 0_hebbian_grow=True 0.989650 27 0.926333 0 0.987450 0.989083 0.9783 16 0.9618 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3
1 1_hebbian_grow=False 0.992483 27 0.923767 0 0.989317 0.991917 0.9810 19 0.9622 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3
2 2_hebbian_grow=True 0.990167 28 0.925483 0 0.986908 0.989100 0.9795 22 0.9648 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3
3 3_hebbian_grow=False 0.992417 25 0.926733 0 0.989283 0.992067 0.9813 18 0.9605 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3
4 4_hebbian_grow=True 0.989933 23 0.927350 0 0.986875 0.989267 0.9780 7 0.9625 ... 0.9 MLPHeb 10 0.2 SGD 0 False False 0.0001 0.3

5 rows × 42 columns


In [6]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [7]:
df.columns


Out[7]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'data_dir', 'dataset_name', 'debug_sparse', 'debug_weights', 'device',
       'hebbian_grow', 'hebbian_prune_perc', 'hidden_sizes', 'input_size',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'network', 'num_classes', 'on_perc', 'optim_alg',
       'pruning_early_stop', 'test_noise', 'use_kwinners', 'weight_decay',
       'weight_prune_perc'],
      dtype='object')

In [8]:
df.shape


Out[8]:
(16, 42)

In [9]:
df.iloc[1]


Out[9]:
Experiment Name                                      1_hebbian_grow=False
train_acc_max                                                    0.992483
train_acc_max_epoch                                                    27
train_acc_min                                                    0.923767
train_acc_min_epoch                                                     0
train_acc_median                                                 0.989317
train_acc_last                                                   0.991917
val_acc_max                                                         0.981
val_acc_max_epoch                                                      19
val_acc_min                                                        0.9622
val_acc_min_epoch                                                       0
val_acc_median                                                    0.97855
val_acc_last                                                       0.9797
epochs                                                                 30
experiment_file_name    /Users/lsouza/nta/results/neurips_debug_test13...
trial_time                                                        12.2084
mean_epoch_time                                                  0.406945
batch_norm                                                           True
data_dir                                        /home/ubuntu/nta/datasets
dataset_name                                                        MNIST
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                      0
hidden_sizes                                                          100
input_size                                                            784
learning_rate                                                         0.1
lr_gamma                                                              0.1
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                        DSNNMixedHeb
momentum                                                              0.9
network                                                            MLPHeb
num_classes                                                            10
on_perc                                                               0.2
optim_alg                                                             SGD
pruning_early_stop                                                      0
test_noise                                                          False
use_kwinners                                                        False
weight_decay                                                       0.0001
weight_prune_perc                                                     0.3
Name: 1, dtype: object

In [10]:
df.groupby('model')['model'].count()


Out[10]:
model
DSNNMixedHeb    16
Name: model, dtype: int64

Analysis

Experiment Details

base_exp_config = dict( device="cuda", # dataset related dataset_name="MNIST", data_dir=os.path.expanduser("~/nta/datasets"), input_size=784, num_classes=10, # network related network="MLPHeb", hidden_sizes=[100, 100, 100], batch_norm=True, use_kwinners=False, # model related model="DSNNMixedHeb", on_perc=0.2, optim_alg="SGD", momentum=0.9, weight_decay=1e-4, learning_rate=0.1, lr_scheduler="MultiStepLR", lr_milestones=[30,60,90], lr_gamma=0.1, # sparse related hebbian_prune_perc=None, weight_prune_perc=0.3, pruning_early_stop=0, hebbian_grow=tune.grid_search([True, False]), # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_test13", num_samples=8, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 30}, resources_per_trial={"cpu": 1, "gpu": 0.25}, loggers=DEFAULT_LOGGERS, verbose=0, )

In [11]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[11]:
0

In [12]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[12]:
(16, 42)

In [13]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']


Out[13]:
Series([], Name: epochs, dtype: int64)

In [17]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)
What are optimal levels of hebbian and weight pruning

In [18]:
agg(['hebbian_grow'])


Out[18]:
val_acc_max_epoch val_acc_max val_acc_last model
round_mean min max mean std min max mean std count
hebbian_grow
False 24 0.979 0.981 0.981 0.001 0.977 0.98 0.979 0.001 8
True 18 0.978 0.980 0.979 0.001 0.974 0.98 0.976 0.002 8
  • Lower accuracy, about 0.2%, but it converges faster (18 vs 24) compared to random growth.
  • Results are consistent with expectation: it accelerates learning, but focusing early on few specific units might prevent the neural network from finding a different set of connections that can lead to higher performance

In [ ]: