Experiment:

Compare bayeasian optimization experiments with random hyperparameter search

Motivation.

Evaluate bayeasian optimization as a hyperparameter search tool

Conclusion



In [1]:

    
%load_ext autoreload
%autoreload 2



In [2]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *
from nupic.research.frameworks.dynamic_sparse.common.browser import *
import re

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rcParams

%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="whitegrid")
sns.set_palette("colorblind")

Load and check data



In [5]:

    
exps = ['sigopt_test1', 'sigopt_test2', 'sigopt_test3']
paths = [os.path.expanduser("~/nta/results/{}".format(e)) for e in exps]
df = load_many(paths)



In [6]:

    
test_string = '0_learning_'
re.match('^\d+', test_string)[0]









    Out[6]:





'0'



In [7]:

    
df.head(5)









    Out[7]:







  
    
      
      Experiment Name
      train_acc_max
      train_acc_max_epoch
      train_acc_min
      train_acc_min_epoch
      train_acc_median
      train_acc_last
      val_acc_max
      val_acc_max_epoch
      val_acc_min
      ...
      momentum
      name
      network
      num_classes
      on_perc
      params_space
      performance_metric
      stats_mean
      stats_std
      weight_decay
    
  
  
    
      0
      1_batch_size_test=1024,batch_size_train=256,ch...
      0.66006
      12
      0.49026
      0
      0.65501
      0.65844
      0.5520
      17
      0.4040
      ...
      0.657374
      sigopt_test1
      resnet18
      10
      0.252978
      {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
      val_acc
      0.473367
      0.2009
      0.033410
    
    
      1
      2_batch_size_test=1024,batch_size_train=256,ch...
      0.54744
      8
      0.39516
      0
      0.53899
      0.53872
      0.4595
      9
      0.1874
      ...
      0.548898
      sigopt_test1
      resnet18
      10
      0.958674
      {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
      val_acc
      0.473367
      0.2009
      0.045435
    
    
      2
      3_batch_size_test=1024,batch_size_train=256,ch...
      0.32850
      1
      0.29214
      16
      0.31230
      0.30276
      0.3089
      7
      0.1165
      ...
      0.106052
      sigopt_test1
      resnet18
      10
      0.869434
      {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
      val_acc
      0.473367
      0.2009
      0.093472
    
    
      3
      4_batch_size_test=1024,batch_size_train=256,ch...
      0.50326
      1
      0.44154
      19
      0.47013
      0.44154
      0.4273
      2
      0.1826
      ...
      0.810624
      sigopt_test1
      resnet18
      10
      0.106663
      {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
      val_acc
      0.473367
      0.2009
      0.080203
    
    
      4
      5_batch_size_test=1024,batch_size_train=256,ch...
      0.52088
      6
      0.38364
      0
      0.51390
      0.51248
      0.3881
      1
      0.2451
      ...
      0.186532
      sigopt_test1
      resnet18
      10
      0.646579
      {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
      val_acc
      0.473367
      0.2009
      0.056150
    
  

5 rows × 39 columns



In [8]:

    
df.columns









    Out[8]:





Index(['Experiment Name', 'train_acc_max', 'train_acc_max_epoch',
       'train_acc_min', 'train_acc_min_epoch', 'train_acc_median',
       'train_acc_last', 'val_acc_max', 'val_acc_max_epoch', 'val_acc_min',
       'val_acc_min_epoch', 'val_acc_median', 'val_acc_last', 'val_acc_all',
       'epochs', 'experiment_file_name', 'trial_time', 'mean_epoch_time',
       'batch_size_test', 'batch_size_train', 'checkpoint_dir', 'data_dir',
       'dataset_name', 'device', 'experiment_type', 'input_size',
       'learning_rate', 'local_dir', 'model', 'momentum', 'name', 'network',
       'num_classes', 'on_perc', 'params_space', 'performance_metric',
       'stats_mean', 'stats_std', 'weight_decay'],
      dtype='object')



In [9]:

    
df['experiment_file_name'].unique()









    Out[9]:





array(['/Users/lsouza/nta/results/sigopt_test1/experiment_state-2020-03-20_02-59-23.json',
       '/Users/lsouza/nta/results/sigopt_test2/experiment_state-2020-03-20_18-07-05.json',
       '/Users/lsouza/nta/results/sigopt_test3/experiment_state-2020-03-25_01-36-27.json'],
      dtype=object)



In [10]:

    
def fix_name(s):
    if s == '/Users/lsouza/nta/results/sigopt_test1/experiment_state-2020-03-20_02-59-23.json':
        return 'SigOpt-A'
    elif s == '/Users/lsouza/nta/results/sigopt_test2/experiment_state-2020-03-20_18-07-05.json':
        return "SigOpt-B"
    elif s == '/Users/lsouza/nta/results/sigopt_test3/experiment_state-2020-03-25_01-36-27.json':
        return 'Random Search'

df['experiment_file_name'] = df['experiment_file_name'].apply(fix_name)



In [11]:

    
df['experiment_file_name'].unique()









    Out[11]:





array(['SigOpt-A', 'SigOpt-B', 'Random Search'], dtype=object)



In [12]:

    
def get_index(s):
    return int(re.match('^\d+', s)[0])

df['index_pos'] = df['Experiment Name'].apply(get_index)



In [13]:

    
df['density'] = df['on_perc']



In [14]:

    
df.iloc[17]









    Out[14]:





Experiment Name         18_batch_size_test=1024,batch_size_train=256,c...
train_acc_max                                                      0.5637
train_acc_max_epoch                                                    19
train_acc_min                                                     0.13784
train_acc_min_epoch                                                     0
train_acc_median                                                  0.47292
train_acc_last                                                     0.5637
val_acc_max                                                         0.476
val_acc_max_epoch                                                      19
val_acc_min                                                        0.1944
val_acc_min_epoch                                                       0
val_acc_median                                                    0.42475
val_acc_last                                                        0.476
val_acc_all             0     0.1944
1     0.2615
2     0.3098
3     0...
epochs                                                                 20
experiment_file_name                                             SigOpt-A
trial_time                                                        9.81008
mean_epoch_time                                                  0.490504
batch_size_test                                                      1024
batch_size_train                                                      256
checkpoint_dir                               /home/ubuntu/nta/checkpoints
data_dir                                                       ~/nta/data
dataset_name                                                      CIFAR10
device                                                               cuda
experiment_type                                                    SigOpt
input_size                                                        22.3333
learning_rate                                                      0.0001
local_dir                                        /home/ubuntu/nta/results
model                                                         SparseModel
momentum                                                         0.289614
name                                                         sigopt_test1
network                                                          resnet18
num_classes                                                            10
on_perc                                                            0.8144
params_space            {'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...
performance_metric                                                val_acc
stats_mean                                                       0.473367
stats_std                                                          0.2009
weight_decay                                                    0.0112179
index_pos                                                              18
density                                                            0.8144
Name: 17, dtype: object



In [15]:

    
df.groupby('experiment_file_name')['model'].count()









    Out[15]:





experiment_file_name
Random Search    300
SigOpt-A         300
SigOpt-B         300
Name: model, dtype: int64

Analysis



In [16]:

    
# helper functions
def mean_and_std(s):
    return "{:.3f} ± {:.3f}".format(s.mean(), s.std())

def round_mean(s):
    return "{:.0f}".format(round(s.mean()))

stats = ['min', 'max', 'mean', 'std']

def agg(columns, filter=None, round=3):
    if filter is None:
        return (df.groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)
    else:
        return (df[filter].groupby(columns)
             .agg({'val_acc_max_epoch': round_mean,
                   'val_acc_max': stats,                
                   'model': ['count']})).round(round)



In [17]:

    
agg(['experiment_file_name'])









    Out[17]:







  
    
      
      val_acc_max_epoch
      val_acc_max
      model
    
    
      
      round_mean
      min
      max
      mean
      std
      count
    
    
      experiment_file_name
      
      
      
      
      
      
    
  
  
    
      Random Search
      10
      0.100
      0.733
      0.483
      0.156
      300
    
    
      SigOpt-A
      15
      0.168
      0.766
      0.682
      0.097
      300
    
    
      SigOpt-B
      16
      0.212
      0.762
      0.704
      0.068
      300



In [25]:

    
def plot_acc_over_time(plot_title):
    plt.figure(figsize=(12,6))
    df_plot = df[df['experiment_file_name'] == plot_title]
    sns.lineplot(df_plot['index_pos'], y=df_plot['val_acc_last'])
    plt.xticks(np.arange(0,300,30))
    plt.ylim(0,0.80)
    plt.title(plot_title)



In [26]:

    
# how to plot?
plot_acc_over_time('Random Search')
plot_acc_over_time('SigOpt-A')
plot_acc_over_time('SigOpt-B')



In [30]:

    
def accumulate(series):
    series = list(series)
    cum_series = [series[0]]
    for i in range(1, len(series)):
        cum_series.append(max(cum_series[i-1], series[i]))
    return cum_series   

def plot_best_acc_over_time(plot_title):
    plt.figure(figsize=(12,6))
    df_plot = df[df['experiment_file_name'] == plot_title].sort_values('index_pos')
    df_plot['cum_acc'] = accumulate(df_plot['val_acc_last'])
    sns.lineplot(df_plot['index_pos'], y=df_plot['cum_acc'])
    plt.xticks(np.arange(0,301,30))
    plt.ylim(0,0.80)
    plt.title(plot_title)



In [32]:

    
plot_best_acc_over_time('Random Search')
plot_best_acc_over_time('SigOpt-A')
plot_best_acc_over_time('SigOpt-B')



In [37]:

    
# list top 5 values of each
# show best values
def show_best(experiment):
    df_exp = df[df['experiment_file_name'] == experiment].sort_values('val_acc_last', ascending=False)[:5]
    return df_exp[['index_pos', 'learning_rate', 'density', 'momentum', 'weight_decay', 'val_acc_last']]



In [38]:

    
show_best('Random Search')









    Out[38]:







  
    
      
      index_pos
      learning_rate
      density
      momentum
      weight_decay
      val_acc_last
    
  
  
    
      813
      213
      0.058898
      0.878634
      0.574113
      0.002438
      0.7332
    
    
      666
      66
      0.083424
      0.770878
      0.898470
      0.001958
      0.7170
    
    
      653
      53
      0.008048
      0.239059
      0.726629
      0.027634
      0.7164
    
    
      810
      210
      0.164996
      0.398287
      0.549241
      0.000841
      0.7146
    
    
      685
      85
      0.011424
      0.580148
      0.635030
      0.010512
      0.7055



In [39]:

    
show_best('SigOpt-A')









    Out[39]:







  
    
      
      index_pos
      learning_rate
      density
      momentum
      weight_decay
      val_acc_last
    
  
  
    
      91
      92
      0.061866
      0.622799
      0.844554
      0.00001
      0.7657
    
    
      145
      146
      0.081277
      0.408934
      0.776824
      0.00001
      0.7637
    
    
      67
      68
      0.057478
      0.712429
      0.835691
      0.00001
      0.7620
    
    
      97
      98
      0.043534
      1.000000
      0.842895
      0.00001
      0.7604
    
    
      146
      147
      0.037768
      1.000000
      0.752623
      0.00001
      0.7581



In [40]:

    
show_best('SigOpt-B')









    Out[40]:







  
    
      
      index_pos
      learning_rate
      density
      momentum
      weight_decay
      val_acc_last
    
  
  
    
      476
      177
      0.054299
      0.752732
      0.587002
      0.0
      0.7605
    
    
      500
      201
      0.154476
      0.572523
      0.603093
      0.0
      0.7582
    
    
      484
      185
      0.105273
      0.725239
      0.107731
      0.0
      0.7552
    
    
      486
      187
      0.121463
      0.742044
      0.079312
      0.0
      0.7546
    
    
      505
      206
      0.148342
      0.341144
      0.446384
      0.0
      0.7540



In [ ]:

	Experiment Name	train_acc_max	train_acc_max_epoch	train_acc_min	train_acc_min_epoch	train_acc_median	train_acc_last	val_acc_max	val_acc_max_epoch	val_acc_min	...	momentum	name	network	num_classes	on_perc	params_space	performance_metric	stats_mean	stats_std	weight_decay
0	1_batch_size_test=1024,batch_size_train=256,ch...	0.66006	12	0.49026	0	0.65501	0.65844	0.5520	17	0.4040	...	0.657374	sigopt_test1	resnet18	10	0.252978	{'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...	val_acc	0.473367	0.2009	0.033410
1	2_batch_size_test=1024,batch_size_train=256,ch...	0.54744	8	0.39516	0	0.53899	0.53872	0.4595	9	0.1874	...	0.548898	sigopt_test1	resnet18	10	0.958674	{'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...	val_acc	0.473367	0.2009	0.045435
2	3_batch_size_test=1024,batch_size_train=256,ch...	0.32850	1	0.29214	16	0.31230	0.30276	0.3089	7	0.1165	...	0.106052	sigopt_test1	resnet18	10	0.869434	{'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...	val_acc	0.473367	0.2009	0.093472
3	4_batch_size_test=1024,batch_size_train=256,ch...	0.50326	1	0.44154	19	0.47013	0.44154	0.4273	2	0.1826	...	0.810624	sigopt_test1	resnet18	10	0.106663	{'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...	val_acc	0.473367	0.2009	0.080203
4	5_batch_size_test=1024,batch_size_train=256,ch...	0.52088	6	0.38364	0	0.51390	0.51248	0.3881	1	0.2451	...	0.186532	sigopt_test1	resnet18	10	0.646579	{'bounds': {'max': 0.2, 'min': 0.0001}, 'name'...	val_acc	0.473367	0.2009	0.056150

	val_acc_max_epoch	val_acc_max				model
	round_mean	min	max	mean	std	count
experiment_file_name
Random Search	10	0.100	0.733	0.483	0.156	300
SigOpt-A	15	0.168	0.766	0.682	0.097	300
SigOpt-B	16	0.212	0.762	0.704	0.068	300

	index_pos	learning_rate	density	momentum	weight_decay	val_acc_last
813	213	0.058898	0.878634	0.574113	0.002438	0.7332
666	66	0.083424	0.770878	0.898470	0.001958	0.7170
653	53	0.008048	0.239059	0.726629	0.027634	0.7164
810	210	0.164996	0.398287	0.549241	0.000841	0.7146
685	85	0.011424	0.580148	0.635030	0.010512	0.7055

	index_pos	learning_rate	density	momentum	weight_decay	val_acc_last
91	92	0.061866	0.622799	0.844554	0.00001	0.7657
145	146	0.081277	0.408934	0.776824	0.00001	0.7637
67	68	0.057478	0.712429	0.835691	0.00001	0.7620
97	98	0.043534	1.000000	0.842895	0.00001	0.7604
146	147	0.037768	1.000000	0.752623	0.00001	0.7581

	index_pos	learning_rate	density	momentum	val_acc_last
476	177	0.054299	0.752732	0.587002	0.7605
500	201	0.154476	0.572523	0.603093	0.7582
484	185	0.105273	0.725239	0.107731	0.7552
486	187	0.121463	0.742044	0.079312	0.7546
505	206	0.148342	0.341144	0.446384	0.7540