Experiment:

Replicate how so dense experiments using the new dynamic sparse framework. Compare results with published in the paper

Motivation.

Ensure our code has no known bugs before proceeding with further experimentation.
Ensure How so Dense experiments are replicable

Conclusion

base_exp_config = dict( device="cuda", # ----- dataset related ---- dataset_name="PreprocessedGSC", data_dir=os.path.expanduser("~/nta/datasets/gsc"), train_batches_per_epoch=5121, # batch_size_train=(4, 16), batch_size_train=16, batch_size_test=20, # required to fit the GPU # ----- network related ---- network="GSCHeb", percent_on_k_winner=[0.095, 0.125, 0.067], k_inference_factor=1.5, boost_strength=[1.5, 1.5, 1.5], boost_strength_factor=[0.9, 0.9, 0.9], hidden_neurons_conv=[64, 64], hidden_neurons_fc=1500, bias=True, dropout=False, batch_norm=True, # ----- model related ---- model=tune.grid_search( ["BaseModel", "SparseModel", "DSNNWeightedMag", "DSNNMixedHeb"] ), optim_alg="SGD", momentum=0, learning_rate=0.01, weight_decay=0.01, lr_scheduler="StepLR", lr_gamma=0.9, on_perc=[1, 1, 0.1, 1], hebbian_prune_perc=None, hebbian_grow=False, weight_prune_perc=0.3, pruning_early_stop=None, # 2 # additional validation test_noise=True, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_test2", num_samples=8, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 25}, resources_per_trial={"cpu": 1, "gpu": 0.25}, loggers=DEFAULT_LOGGERS, verbose=0, )



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data



In [3]:

    
# exps = ['replicate_hsd_test2']
# exps = ['replicate_hsd_debug1']
# exps = ['replicate_hsd_debug3_8x']
exps = ['replicate_hsd_debug6_8x']
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [4]:

    
df.head(5)









    Out[4]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      momentum
      network
      on_perc
      optim_alg
      percent_on_k_winner
      pruning_early_stop
      test_noise
      train_batches_per_epoch
      weight_decay
      weight_prune_perc
    
  
  
    
      0
      0_model=BaseModel
      0.950005
      24
      0.634460
      0
      0.934186
      0.950005
      0.963512
      21
      0.895349
      ...
      0
      GSCHeb
      0.775
      SGD
      0.095667
      None
      True
      5121
      0.01
      0.3
    
    
      1
      1_model=SparseModel
      0.946685
      24
      0.626648
      0
      0.932428
      0.946685
      0.960305
      18
      0.887731
      ...
      0
      GSCHeb
      0.775
      SGD
      0.095667
      None
      True
      5121
      0.01
      0.3
    
    
      2
      2_model=DSNNWeightedMag
      0.949419
      24
      0.629577
      0
      0.933600
      0.949419
      0.963913
      21
      0.882919
      ...
      0
      GSCHeb
      0.775
      SGD
      0.095667
      None
      True
      5121
      0.01
      0.3
    
    
      3
      3_model=DSNNMixedHeb
      0.949273
      24
      0.653159
      0
      0.935895
      0.949273
      0.965918
      23
      0.884523
      ...
      0
      GSCHeb
      0.775
      SGD
      0.095667
      None
      True
      5121
      0.01
      0.3
    
    
      4
      4_model=BaseModel
      0.952007
      24
      0.643785
      0
      0.935016
      0.952007
      0.963111
      17
      0.890938
      ...
      0
      GSCHeb
      0.775
      SGD
      0.095667
      None
      True
      5121
      0.01
      0.3
    
  

5 rows × 55 columns



In [5]:

    
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)



In [6]:

    
df.columns









    Out[6]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'noise_acc_max',
       'noise_acc_max_epoch', 'noise_acc_min', 'noise_acc_min_epoch',
       'noise_acc_median', 'noise_acc_last', 'val_acc_all', 'epochs',
       'experiment_file_name', 'trial_time', 'mean_epoch_time', 'batch_norm',
       'batch_size_test', 'batch_size_train', 'bias', 'boost_strength',
       'boost_strength_factor', 'data_dir', 'dataset_name', 'debug_sparse',
       'debug_weights', 'device', 'dropout', 'hebbian_grow',
       'hebbian_prune_perc', 'hidden_neurons_conv', 'hidden_neurons_fc',
       'k_inference_factor', 'learning_rate', 'lr_gamma', 'lr_scheduler',
       'model', 'momentum', 'network', 'on_perc', 'optim_alg',
       'percent_on_k_winner', 'pruning_early_stop', 'test_noise',
       'train_batches_per_epoch', 'weight_decay', 'weight_prune_perc'],
      dtype='object')



In [7]:

    
df.shape









    Out[7]:





(120, 55)



In [8]:

    
df.iloc[1]









    Out[8]:





Experiment Name                                          1_model=SparseModel
train_acc_max                                                       0.946685
train_acc_max_epoch                                                       24
train_acc_min                                                       0.626648
train_acc_min_epoch                                                        0
train_acc_median                                                    0.932428
train_acc_last                                                      0.946685
val_acc_max                                                         0.960305
val_acc_max_epoch                                                         18
val_acc_min                                                         0.887731
val_acc_min_epoch                                                          0
val_acc_median                                                      0.950281
val_acc_last                                                        0.955894
noise_acc_max                                                       0.961599
noise_acc_max_epoch                                                       21
noise_acc_min                                                     0.00352665
noise_acc_min_epoch                                                        5
noise_acc_median                                                    0.198276
noise_acc_last                                                      0.198276
val_acc_all                0     0.887731
1     0.925020
2     0.925020
3...
epochs                                                                    25
experiment_file_name       /Users/lsouza/nta/results/replicate_hsd_debug6...
trial_time                                                           5.72317
mean_epoch_time                                                     0.228927
batch_norm                                                              True
batch_size_test                                                           20
batch_size_train                                                          16
bias                                                                    True
boost_strength                                                           1.5
boost_strength_factor                                                    0.9
data_dir                                       /home/ubuntu/nta/datasets/gsc
dataset_name                                                 PreprocessedGSC
debug_sparse                                                            True
debug_weights                                                           True
device                                                                  cuda
dropout                                                                False
hebbian_grow                                                           False
hebbian_prune_perc                                                         0
hidden_neurons_conv                                                       64
hidden_neurons_fc                                                       1500
k_inference_factor                                                       1.5
learning_rate                                                           0.01
lr_gamma                                                                 0.9
lr_scheduler                                                          StepLR
model                                                            SparseModel
momentum                                                                   0
network                                                               GSCHeb
on_perc                                                                0.775
optim_alg                                                                SGD
percent_on_k_winner                                                0.0956667
pruning_early_stop                                                      None
test_noise                                                              True
train_batches_per_epoch                                                 5121
weight_decay                                                            0.01
weight_prune_perc                                                        0.3
Name: 1, dtype: object



In [9]:

    
df.groupby('model')['model'].count()









    Out[9]:





model
BaseModel          30
DSNNMixedHeb       30
DSNNWeightedMag    30
SparseModel        30
Name: model, dtype: int64

Analysis

Experiment Details



In [10]:

    
num_epochs = 25
# Did any  trials failed?
df[df["epochs"]<num_epochs]["epochs"].count()









    Out[10]:





0



In [11]:

    
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=num_epochs]
df.shape









    Out[11]:





(120, 55)



In [12]:

    
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<num_epochs
df_origin[df_origin['failed']]['epochs']









    Out[12]:





Series([], Name: epochs, dtype: int64)



In [13]:

    
# helper functions
def mean_and_std(s):
    return "{:.2f} ± {:.2f}".format(s.mean()*100, s.std()*100)

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'val_acc_last': stats,
                   'model': ['count']})).round(round)
    
    
def agg_paper(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max': mean_and_std,
                   'val_acc_last': mean_and_std,
                   'train_acc_last': mean_and_std,
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max': mean_and_std,
                   'val_acc_last': mean_and_std,
                   'train_acc_last': mean_and_std,
                   'model': ['count']})).round(round)

Results



In [14]:

    
agg(['model'])









    Out[14]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      val_acc_last
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      min
      max
      mean
      std
      count
    
    
      model
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      BaseModel
      21
      0.962
      0.968
      0.965
      0.002
      0.955
      0.968
      0.963
      0.003
      30
    
    
      DSNNMixedHeb
      20
      0.960
      0.966
      0.963
      0.002
      0.957
      0.965
      0.961
      0.003
      30
    
    
      DSNNWeightedMag
      20
      0.960
      0.968
      0.963
      0.002
      0.952
      0.968
      0.960
      0.003
      30
    
    
      SparseModel
      19
      0.959
      0.967
      0.963
      0.002
      0.953
      0.965
      0.960
      0.003
      30



In [15]:

    
agg_paper(['model'])









    Out[15]:







  
    
      
      val_acc_max
      val_acc_last
      train_acc_last
      model
    
    
      
      mean_and_std
      mean_and_std
      mean_and_std
      count
    
    
      model
      
      
      
      
    
  
  
    
      BaseModel
      96.50 ± 0.17
      96.26 ± 0.29
      95.16 ± 0.14
      30
    
    
      DSNNMixedHeb
      96.31 ± 0.18
      96.08 ± 0.25
      94.88 ± 0.15
      30
    
    
      DSNNWeightedMag
      96.30 ± 0.18
      95.97 ± 0.33
      94.86 ± 0.14
      30
    
    
      SparseModel
      96.28 ± 0.21
      95.99 ± 0.29
      94.85 ± 0.13
      30



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	network	on_perc	optim_alg	percent_on_k_winner	pruning_early_stop	test_noise	train_batches_per_epoch	weight_decay	weight_prune_perc
0	0_model=BaseModel	0.950005	24	0.634460	0.934186	0.950005	0.963512	21	0.895349	...	GSCHeb	0.775	SGD	0.095667	None	True	5121	0.01	0.3
1	1_model=SparseModel	0.946685	24	0.626648	0.932428	0.946685	0.960305	18	0.887731	...	GSCHeb	0.775	SGD	0.095667	None	True	5121	0.01	0.3
2	2_model=DSNNWeightedMag	0.949419	24	0.629577	0.933600	0.949419	0.963913	21	0.882919	...	GSCHeb	0.775	SGD	0.095667	None	True	5121	0.01	0.3
3	3_model=DSNNMixedHeb	0.949273	24	0.653159	0.935895	0.949273	0.965918	23	0.884523	...	GSCHeb	0.775	SGD	0.095667	None	True	5121	0.01	0.3
4	4_model=BaseModel	0.952007	24	0.643785	0.935016	0.952007	0.963111	17	0.890938	...	GSCHeb	0.775	SGD	0.095667	None	True	5121	0.01	0.3

	val_acc_max_epoch	val_acc_max				val_acc_last				model
	round_mean	min	max	mean	std	min	max	mean	std	count
model
BaseModel	21	0.962	0.968	0.965	0.002	0.955	0.968	0.963	0.003	30
DSNNMixedHeb	20	0.960	0.966	0.963	0.002	0.957	0.965	0.961	0.003	30
DSNNWeightedMag	20	0.960	0.968	0.963	0.002	0.952	0.968	0.960	0.003	30
SparseModel	19	0.959	0.967	0.963	0.002	0.953	0.965	0.960	0.003	30

	val_acc_max	val_acc_last	train_acc_last	model
	mean_and_std	mean_and_std	mean_and_std	count
model
BaseModel	96.50 ± 0.17	96.26 ± 0.29	95.16 ± 0.14	30
DSNNMixedHeb	96.31 ± 0.18	96.08 ± 0.25	94.88 ± 0.15	30
DSNNWeightedMag	96.30 ± 0.18	95.97 ± 0.33	94.86 ± 0.14	30
SparseModel	96.28 ± 0.21	95.99 ± 0.29	94.85 ± 0.13	30