Experiment:

Evaluate pruning by magnitude weighted by coactivations (more thorough evaluation), compare it to baseline (SET), in GSC. Applied only to linear layers

Motivation.

Check if results are consistently above baseline.

Conclusion

# experiment configurations base_exp_config = dict( device="cuda", # dataset related dataset_name="PreprocessedGSC", data_dir=os.path.expanduser("~/nta/datasets/gsc"), batch_size_train=(4, 16), batch_size_test=1000, # network related network="GSCHeb", optim_alg="SGD", momentum=0, learning_rate=0.01, weight_decay=0.01, lr_scheduler="MultiStepLR", lr_milestones=[30, 60, 90], lr_gamma=0.90, use_kwinners=True, # model related model=tune.grid_search(["DSNNWeightedMag", "DSNNMixedHeb", "SparseModel"]), on_perc=tune.grid_search(list(np.arange(0, 0.101, 0.005))), # sparse related hebbian_prune_perc=None, hebbian_grow=False, weight_prune_perc=0.3, pruning_early_stop=2, # additional validation test_noise=False, # debugging debug_weights=True, debug_sparse=True, ) # ray configurations tune_config = dict( name=__file__.replace(".py", "") + "_eval3", num_samples=1, # num_samples=3, local_dir=os.path.expanduser("~/nta/results"), checkpoint_freq=0, checkpoint_at_end=False, stop={"training_iteration": 100}, resources_per_trial={"cpu": 1, "gpu": .25}, loggers=DEFAULT_LOGGERS, verbose=0, )

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data


In [3]:
exps = ['improved_mag_gsc_eval3', 'improved_mag_gsc_eval4']
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)


improved_mag_gsc_eval3
improved_mag_gsc_eval4

In [4]:
df.head(5)


Out[4]:
Experiment Name train_acc_max train_acc_max_epoch train_acc_min train_acc_min_epoch train_acc_median train_acc_last val_acc_max val_acc_max_epoch val_acc_min ... model momentum network on_perc optim_alg pruning_early_stop test_noise use_kwinners weight_decay weight_prune_perc
0 0_model=DSNNWeightedMag,on_perc=0.0 0.092960 19 0.087052 47 0.089689 0.089542 0.103448 22 0.000000 ... DSNNWeightedMag 0 GSCHeb 0.000 SGD 2 False True 0.01 0.3
1 1_model=DSNNMixedHeb,on_perc=0.0 0.091934 24 0.085880 33 0.089591 0.089542 0.103448 10 0.000000 ... DSNNMixedHeb 0 GSCHeb 0.000 SGD 2 False True 0.01 0.3
2 2_model=SparseModel,on_perc=0.0 0.092325 38 0.085587 99 0.089322 0.085587 0.103448 5 0.000000 ... SparseModel 0 GSCHeb 0.000 SGD 2 False True 0.01 0.3
3 3_model=DSNNWeightedMag,on_perc=0.005 0.399522 7 0.156821 0 0.199736 0.196026 0.394592 8 0.145376 ... DSNNWeightedMag 0 GSCHeb 0.005 SGD 2 False True 0.01 0.3
4 4_model=DSNNMixedHeb,on_perc=0.005 0.365931 11 0.181330 0 0.296016 0.296065 0.362461 11 0.087382 ... DSNNMixedHeb 0 GSCHeb 0.005 SGD 2 False True 0.01 0.3

5 rows × 43 columns


In [5]:
# replace hebbian prine
df['hebbian_prune_perc'] = df['hebbian_prune_perc'].replace(np.nan, 0.0, regex=True)
df['weight_prune_perc'] = df['weight_prune_perc'].replace(np.nan, 0.0, regex=True)

In [6]:
df.columns


Out[6]:
Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'val_acc_all',
       'epochs', 'experiment_file_name', 'experiment_base_path', 'trial_time',
       'mean_epoch_time', 'scatter_plot_dicts', 'batch_size_test',
       'batch_size_train', 'data_dir', 'dataset_name', 'debug_sparse',
       'debug_weights', 'device', 'hebbian_grow', 'hebbian_prune_perc',
       'learning_rate', 'lr_gamma', 'lr_milestones', 'lr_scheduler', 'model',
       'momentum', 'network', 'on_perc', 'optim_alg', 'pruning_early_stop',
       'test_noise', 'use_kwinners', 'weight_decay', 'weight_prune_perc'],
      dtype='object')

In [7]:
df.shape


Out[7]:
(269, 43)

In [8]:
df.iloc[1]


Out[8]:
Experiment Name                          1_model=DSNNMixedHeb,on_perc=0.0
train_acc_max                                                   0.0919344
train_acc_max_epoch                                                    24
train_acc_min                                                   0.0858803
train_acc_min_epoch                                                    33
train_acc_median                                                0.0895909
train_acc_last                                                   0.089542
val_acc_max                                                      0.103448
val_acc_max_epoch                                                      10
val_acc_min                                                             0
val_acc_min_epoch                                                       0
val_acc_median                                                  0.0971787
val_acc_last                                                    0.0971787
val_acc_all             0     0.000000
1     0.101489
2     0.097179
3...
epochs                                                                100
experiment_file_name    /Users/mcaporale/nta/results/improved_mag_gsc_...
experiment_base_path                               improved_mag_gsc_eval3
trial_time                                                        26.1431
mean_epoch_time                                                  0.261431
scatter_plot_dicts                                                     {}
batch_size_test                                                      1000
batch_size_train                                                       10
data_dir                                    /home/ubuntu/nta/datasets/gsc
dataset_name                                              PreprocessedGSC
debug_sparse                                                         True
debug_weights                                                        True
device                                                               cuda
hebbian_grow                                                        False
hebbian_prune_perc                                                      0
learning_rate                                                        0.01
lr_gamma                                                              0.9
lr_milestones                                                          60
lr_scheduler                                                  MultiStepLR
model                                                        DSNNMixedHeb
momentum                                                                0
network                                                            GSCHeb
on_perc                                                                 0
optim_alg                                                             SGD
pruning_early_stop                                                      2
test_noise                                                          False
use_kwinners                                                         True
weight_decay                                                         0.01
weight_prune_perc                                                     0.3
Name: 1, dtype: object

In [9]:
df.groupby('model')['model'].count()


Out[9]:
model
DSNNMixedHeb       90
DSNNWeightedMag    90
SparseModel        89
Name: model, dtype: int64

Analysis

Experiment Details


In [10]:
# Did any  trials failed?
df[df["epochs"]<30]["epochs"].count()


Out[10]:
5

In [11]:
# Removing failed or incomplete trials
df_origin = df.copy()
df = df_origin[df_origin["epochs"]>=30]
df.shape


Out[11]:
(264, 43)

In [12]:
# which ones failed?
# failed, or still ongoing?
df_origin['failed'] = df_origin["epochs"]<30
df_origin[df_origin['failed']]['epochs']


Out[12]:
264    26
265    23
266    26
267    11
268     1
Name: epochs, dtype: int64

In [13]:
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

In [14]:
def agg2(columns, filter=None, round=3):
    if filter is None:
        return (df2.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df2[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)

base = os.path.join('gsc-smalldense-2019-10-11-exp1')
exps = [
    os.path.join(base, exp) for exp in [
        'gsc-smalldense'
    ]
]
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df2 = load_many(paths)

df_agg = agg2(['equivalent_on_perc'])
df_agg

equivalent_on_percs = df_agg.index.values

val_means = df_agg['val_acc_max']['mean']
val_means = np.array(val_means)
val_stds = df_agg['val_acc_max']['std']
val_stds = np.array(val_stds)


gsc-smalldense
Does improved weight pruning outperforms regular SET

In [15]:
agg(['model'])


Out[15]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
model
DSNNMixedHeb 54 0.103 0.954 0.823 0.228 88
DSNNWeightedMag 63 0.103 0.952 0.834 0.223 88
SparseModel 74 0.103 0.949 0.808 0.238 88

In [16]:
agg(['on_perc'])


Out[16]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
on_perc
0.000 10 0.103 0.106 0.104 0.001 15
0.005 25 0.218 0.425 0.352 0.059 15
0.010 35 0.609 0.732 0.666 0.044 15
0.015 40 0.735 0.822 0.779 0.028 15
0.020 50 0.807 0.880 0.836 0.025 12
0.025 56 0.841 0.905 0.875 0.021 12
0.030 73 0.865 0.915 0.891 0.016 12
0.035 69 0.878 0.922 0.900 0.015 12
0.040 70 0.895 0.927 0.914 0.010 12
0.045 77 0.910 0.942 0.926 0.009 12
0.050 78 0.915 0.938 0.927 0.006 12
0.055 73 0.928 0.942 0.933 0.004 12
0.060 73 0.931 0.947 0.936 0.005 12
0.065 82 0.929 0.947 0.940 0.006 12
0.070 74 0.924 0.947 0.938 0.006 12
0.075 84 0.939 0.949 0.944 0.003 12
0.080 80 0.938 0.952 0.944 0.004 12
0.085 82 0.938 0.951 0.945 0.004 12
0.090 83 0.943 0.951 0.948 0.002 12
0.095 74 0.942 0.954 0.947 0.003 12
0.100 83 0.946 0.954 0.949 0.002 12

In [17]:
agg(['on_perc', 'model'], df['on_perc'] == 0.04)


Out[17]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
on_perc model
0.04 DSNNMixedHeb 46 0.911 0.916 0.914 0.002 4
DSNNWeightedMag 84 0.922 0.927 0.924 0.002 4
SparseModel 81 0.895 0.909 0.902 0.006 4

In [18]:
# translate model names
rcParams['figure.figsize'] = 16, 8
d = {
    'DSNNWeightedMag': 'Dynamic Sparse Neural Network (DSNN)',
    'DSNNMixedHeb': 'Sparse Evolutionary Training (SET)',
    'SparseModel': 'Static Sparse',        
}
df_plot = df.copy()
df_plot['model'] = df_plot['model'].apply(lambda x: d[x])

In [19]:
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x12993f828>

In [20]:
fltr = df_plot['model'] != 'Sparse'
agg(['on_perc', 'model'], filter=fltr)


Out[20]:
val_acc_max_epoch val_acc_max model
round_mean min max mean std count
on_perc model
0.000 DSNNMixedHeb 6 0.103 0.103 0.103 0.000 5
DSNNWeightedMag 13 0.103 0.103 0.103 0.000 5
SparseModel 12 0.103 0.106 0.104 0.001 5
0.005 DSNNMixedHeb 10 0.351 0.396 0.366 0.017 5
DSNNWeightedMag 8 0.394 0.425 0.406 0.014 5
SparseModel 59 0.218 0.334 0.283 0.044 5
0.010 DSNNMixedHeb 11 0.639 0.699 0.666 0.027 5
DSNNWeightedMag 15 0.656 0.732 0.710 0.031 5
SparseModel 80 0.609 0.640 0.623 0.014 5
0.015 DSNNMixedHeb 18 0.735 0.789 0.776 0.023 5
DSNNWeightedMag 25 0.793 0.822 0.809 0.011 5
SparseModel 78 0.745 0.765 0.754 0.009 5
0.020 DSNNMixedHeb 39 0.819 0.852 0.836 0.015 4
DSNNWeightedMag 42 0.830 0.880 0.859 0.025 4
SparseModel 70 0.807 0.820 0.814 0.007 4
0.025 DSNNMixedHeb 34 0.860 0.877 0.872 0.008 4
DSNNWeightedMag 55 0.892 0.905 0.900 0.006 4
SparseModel 78 0.841 0.864 0.854 0.011 4
0.030 DSNNMixedHeb 51 0.870 0.894 0.886 0.011 4
DSNNWeightedMag 77 0.901 0.915 0.908 0.007 4
SparseModel 90 0.865 0.891 0.880 0.011 4
0.035 DSNNMixedHeb 65 0.891 0.912 0.900 0.009 4
DSNNWeightedMag 74 0.900 0.922 0.913 0.009 4
SparseModel 68 0.878 0.903 0.886 0.012 4
0.040 DSNNMixedHeb 46 0.911 0.916 0.914 0.002 4
DSNNWeightedMag 84 0.922 0.927 0.924 0.002 4
SparseModel 81 0.895 0.909 0.902 0.006 4
0.045 DSNNMixedHeb 70 0.927 0.930 0.929 0.002 4
DSNNWeightedMag 81 0.925 0.942 0.934 0.007 4
SparseModel 80 0.910 0.921 0.917 0.005 4
... ... ... ... ... ... ... ...
0.055 DSNNMixedHeb 74 0.930 0.942 0.936 0.005 4
DSNNWeightedMag 70 0.929 0.936 0.933 0.003 4
SparseModel 77 0.928 0.933 0.930 0.003 4
0.060 DSNNMixedHeb 73 0.933 0.947 0.939 0.006 4
DSNNWeightedMag 71 0.933 0.942 0.937 0.004 4
SparseModel 74 0.931 0.934 0.933 0.001 4
0.065 DSNNMixedHeb 85 0.942 0.945 0.944 0.001 4
DSNNWeightedMag 80 0.941 0.947 0.943 0.003 4
SparseModel 82 0.929 0.941 0.934 0.005 4
0.070 DSNNMixedHeb 76 0.936 0.945 0.940 0.004 4
DSNNWeightedMag 82 0.940 0.947 0.943 0.003 4
SparseModel 63 0.924 0.935 0.932 0.006 4
0.075 DSNNMixedHeb 73 0.939 0.949 0.943 0.005 4
DSNNWeightedMag 95 0.945 0.948 0.947 0.001 4
SparseModel 83 0.940 0.944 0.943 0.002 4
0.080 DSNNMixedHeb 73 0.943 0.952 0.946 0.004 4
DSNNWeightedMag 82 0.944 0.948 0.947 0.002 4
SparseModel 86 0.938 0.942 0.940 0.002 4
0.085 DSNNMixedHeb 72 0.947 0.951 0.948 0.002 4
DSNNWeightedMag 88 0.940 0.949 0.945 0.004 4
SparseModel 86 0.938 0.946 0.943 0.003 4
0.090 DSNNMixedHeb 77 0.947 0.951 0.949 0.002 4
DSNNWeightedMag 87 0.947 0.949 0.949 0.001 4
SparseModel 86 0.943 0.949 0.945 0.003 4
0.095 DSNNMixedHeb 70 0.946 0.954 0.948 0.004 4
DSNNWeightedMag 71 0.945 0.950 0.948 0.002 4
SparseModel 80 0.942 0.948 0.944 0.003 4
0.100 DSNNMixedHeb 82 0.946 0.954 0.950 0.003 4
DSNNWeightedMag 93 0.949 0.952 0.950 0.002 4
SparseModel 74 0.946 0.949 0.948 0.001 4

63 rows × 6 columns


In [21]:
!source ~/.bash_profile
!echo $GOOGLE_DRIVE_PATH
!echo hi


hi

In [24]:
def plot_for_paper():
    rcParams['figure.figsize'] = 10,6
    matplotlib.rc('xtick', labelsize=14)
    matplotlib.rc('ytick', labelsize=14)
    matplotlib.rc('ytick', labelsize=14)
    plt.rcParams.update({'text.usetex': True})
    plt.rcParams.update({'axes.labelsize': 14})
    plt.rcParams.update({'legend.fontsize': 14, 'legend.loc': 'lower right'})
    plt.rcParams.update({'font.size': 14})
    plt.rcParams.update({"axes.grid": True, "grid.linewidth": 0.5})
    plt.rcParams.update({'font.family': 'serif'})
    plt.rcParams.update({'text.usetex': True})    
    
    
plot_for_paper()

fig, ax = plt.subplots()
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
plt.errorbar(
    x=equivalent_on_percs,
    y=val_means,
    yerr=val_stds,
    color='k',
    marker='.',
    lw=0,
    elinewidth=2,
    capsize=4,
    markersize=5,
    label="Small-Dense Equivalents"
)

plt.xlabel("% of active weights ($\sigma$)")
plt.ylabel("test accuracy")
plt.ylim((0.4, 1.0))
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles[1:], labels=labels[1:])
fig_path = r"~/Google Drive/papers/dynamic_sparse/figures/neurips_2019/active_weights.png"
fig_path = os.path.expanduser(fig_path)

plt.savefig(fig_path, dpi=1000, bbox_inches='tight')



In [ ]:
# plt.rcParams['figure.figsize'] = (8, 3)
    font_size=12
#     plt.rcParams.update({'font.size': font_size})
#     plt.rcParams.update({'font.family': 'Times New Roman'})
#     plt.rcParams.update({'axes.labelsize': font_size})
#     plt.rcParams.update({'axes.titlesize': 1.5*font_size})
#     plt.rcParams.update({'legend.fontsize': font_size})
#     plt.rcParams.update({'xtick.labelsize': font_size})
#     plt.rcParams.update({'ytick.labelsize': font_size})
#     plt.rcParams.update({'savefig.dpi': 2*plt.rcParams['savefig.dpi']})
#     plt.rcParams.update({'xtick.major.size': 3})
#     plt.rcParams.update({'xtick.minor.size': 3})
#     plt.rcParams.update({'xtick.major.width': 1})
#     plt.rcParams.update({'xtick.minor.width': 1})
#     plt.rcParams.update({'ytick.major.size': 3})
#     plt.rcParams.update({'ytick.minor.size': 3})
#     plt.rcParams.update({'ytick.major.width': 1})
#     plt.rcParams.update({'ytick.minor.width': 1 })
    # plt.rcParams.update({})

In [ ]:
# sns.scatterplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_max', hue='model')
plt.ylim(0.7,0.98)
plt.errorbar(
    x=equivalent_on_percs,
    y=val_means,
    yerr=val_stds,
    color='k',
    marker='*',
    lw=0,
    elinewidth=2,
    capsize=2,
    markersize=10,
    label="Small-Dense Equivalents"
)

In [ ]:
rcParams['figure.figsize'] = 16, 8
filter = df_plot['model'] != 'Static'
sns.lineplot(data=df_plot[filter], x='on_perc', y='val_acc_max_epoch', hue='model')

In [ ]:
sns.lineplot(data=df_plot, x='on_perc', y='val_acc_last', hue='model')

In [ ]:


In [ ]:


In [ ]: