In [1]:

    
%load_ext autoreload
%autoreload 2



In [130]:

    
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *

Load data and general exploration



In [132]:

    
browser = RayTuneExperimentBrowser(os.path.expanduser("~/nta/results/VGG19SparseFull"))



In [133]:

    
df = browser.best_experiments(min_test_accuracy=0.0, min_noise_accuracy=0.0, sort_by="test_accuracy")









    



/Users/lsouza/miniconda3/envs/numenta/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3118: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
/Users/lsouza/miniconda3/envs/numenta/lib/python3.7/site-packages/numpy/core/_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)



In [134]:

    
df.head(5)









    Out[134]:







  
    
      
      Experiment Name
      test_accuracy
      test_accuracy_max
      epoch_test_accuracy
      noise_accuracy
      noise_accuracy_max
      epoch_noise_accuracy
      epochs
      batch_size
      batches_in_epoch
      ...
      path
      repetitions
      restore_supported
      sync_function
      test_batch_size
      test_batches_in_epoch
      upload_dir
      use_max_pooling
      weight_decay
      weight_sparsity
    
  
  
    
      9
      9_batch_size=128,batches_in_epoch=403,boost_st...
      0.7021
      0.7021
      89
      0.2357
      0.2584
      52
      90
      128
      403
      ...
      ~/nta/results
      150
      True
      aws s3 sync `dirname {local_dir}` {remote_dir}...
      128
      500
      s3://lsouza/ray/results
      True
      0.000990
      NaN
    
    
      175
      175_batch_size=128,batches_in_epoch=425,boost_...
      0.7017
      0.7091
      140
      0.2208
      0.2528
      87
      164
      128
      425
      ...
      ~/nta/results
      150
      True
      aws s3 sync `dirname {local_dir}` {remote_dir}...
      128
      500
      s3://lsouza/ray/results
      True
      0.000609
      NaN
    
    
      183
      183_batch_size=128,batches_in_epoch=529,boost_...
      0.7005
      0.7005
      89
      0.2537
      0.2821
      29
      90
      128
      529
      ...
      ~/nta/results
      150
      True
      aws s3 sync `dirname {local_dir}` {remote_dir}...
      128
      500
      s3://lsouza/ray/results
      True
      0.000832
      NaN
    
    
      277
      277_batch_size=128,batches_in_epoch=518,boost_...
      0.6990
      0.6990
      89
      0.2584
      0.2932
      71
      90
      128
      518
      ...
      ~/nta/results
      150
      True
      aws s3 sync `dirname {local_dir}` {remote_dir}...
      128
      500
      s3://lsouza/ray/results
      True
      0.000680
      NaN
    
    
      11
      11_batch_size=128,batches_in_epoch=564,boost_s...
      0.6965
      0.6989
      159
      0.2341
      0.2620
      85
      164
      128
      564
      ...
      ~/nta/results
      150
      True
      aws s3 sync `dirname {local_dir}` {remote_dir}...
      128
      500
      s3://lsouza/ray/results
      True
      0.000698
      NaN
    
  

5 rows × 49 columns



In [136]:

    
df.columns









    Out[136]:





Index(['Experiment Name', 'test_accuracy', 'test_accuracy_max',
       'epoch_test_accuracy', 'noise_accuracy', 'noise_accuracy_max',
       'epoch_noise_accuracy', 'epochs', 'batch_size', 'batches_in_epoch',
       'batches_in_first_epoch', 'block_sizes', 'boost_strength',
       'boost_strength_factor', 'checkpoint_at_end', 'cnn_kernel_size',
       'cnn_out_channels', 'cnn_percent_on', 'cnn_weight_sparsity',
       'cpu_percentage', 'data_dir', 'dataset', 'experiment',
       'first_epoch_batch_size', 'gpu_percentage', 'input_shape', 'iterations',
       'k_inference_factor', 'learning_rate', 'learning_rate_gamma',
       'linear_n', 'linear_percent_on', 'lr_step_schedule', 'momentum', 'name',
       'network_type', 'num_cpus', 'num_gpus', 'output_size', 'path',
       'repetitions', 'restore_supported', 'sync_function', 'test_batch_size',
       'test_batches_in_epoch', 'upload_dir', 'use_max_pooling',
       'weight_decay', 'weight_sparsity'],
      dtype='object')



In [137]:

    
df.iloc[0]









    Out[137]:





Experiment Name           9_batch_size=128,batches_in_epoch=403,boost_st...
test_accuracy                                                        0.7021
test_accuracy_max                                                    0.7021
epoch_test_accuracy                                                      89
noise_accuracy                                                       0.2357
noise_accuracy_max                                                   0.2584
epoch_noise_accuracy                                                     52
epochs                                                                   90
batch_size                                                              128
batches_in_epoch                                                        403
batches_in_first_epoch                                                  600
block_sizes                                                             3.2
boost_strength                                                      1.73176
boost_strength_factor                                              0.626255
checkpoint_at_end                                                      True
cnn_kernel_size                                                           3
cnn_out_channels                                                      294.4
cnn_percent_on                                                      0.31395
cnn_weight_sparsity                                                0.917248
cpu_percentage                                                            1
data_dir                                                     ~/nta/datasets
dataset                                                            CIFAR100
experiment                                                             grid
first_epoch_batch_size                                                    4
gpu_percentage                                                        0.165
input_shape                                                         22.3333
iterations                                                              164
k_inference_factor                                                 0.977242
learning_rate                                                      0.102157
learning_rate_gamma                                               0.0510721
linear_n                                                                NaN
linear_percent_on                                                       NaN
lr_step_schedule                                                      101.5
momentum                                                           0.399231
name                                                        VGG19SparseFull
network_type                                                            vgg
num_cpus                                                                 31
num_gpus                                                                  4
output_size                                                             100
path                                                          ~/nta/results
repetitions                                                             150
restore_supported                                                      True
sync_function             aws s3 sync `dirname {local_dir}` {remote_dir}...
test_batch_size                                                         128
test_batches_in_epoch                                                   500
upload_dir                                          s3://lsouza/ray/results
use_max_pooling                                                        True
weight_decay                                                     0.00099037
weight_sparsity                                                         NaN
Name: 9, dtype: object

Epochs and Accuracy exploration



In [155]:

    
len(df[df['epochs']==164])









    Out[155]:





45



In [156]:

    
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].corr()









    Out[156]:







  
    
      
      test_accuracy_max
      noise_accuracy_max
    
  
  
    
      test_accuracy_max
      1.000000
      -0.278818
    
    
      noise_accuracy_max
      -0.278818
      1.000000



In [157]:

    
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].min()









    Out[157]:





test_accuracy_max     0.5826
noise_accuracy_max    0.2199
dtype: float64



In [158]:

    
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].mean()









    Out[158]:





test_accuracy_max     0.663829
noise_accuracy_max    0.288516
dtype: float64



In [159]:

    
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].max()









    Out[159]:





test_accuracy_max     0.7091
noise_accuracy_max    0.3271
dtype: float64



In [160]:

    
len(df[df['epochs']==90])









    Out[160]:





43



In [161]:

    
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].corr()









    Out[161]:







  
    
      
      test_accuracy_max
      noise_accuracy_max
    
  
  
    
      test_accuracy_max
      1.000000
      0.584994
    
    
      noise_accuracy_max
      0.584994
      1.000000



In [162]:

    
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].min()









    Out[162]:





test_accuracy_max     0.4906
noise_accuracy_max    0.1592
dtype: float64



In [163]:

    
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].mean()









    Out[163]:





test_accuracy_max     0.638398
noise_accuracy_max    0.252256
dtype: float64



In [164]:

    
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].max()









    Out[164]:





test_accuracy_max     0.7021
noise_accuracy_max    0.2967
dtype: float64

It is interesting to see that the experiments that run 90 epochs have a very different correlation between noise and test accuracy than the experiments that run 164 epochs, even though the averages are very similar. What can that mean? After some point progress in test accuracy can lead to regress in noise accuracy? Which would imply the more the model fits to the standard data, the lesser the noise accuracy



In [185]:

    
df[df['epochs']>=30][['epochs', 'test_accuracy']].astype(np.float32).corr()









    Out[185]:







  
    
      
      epochs
      test_accuracy
    
  
  
    
      epochs
      1.000000
      0.830726
    
    
      test_accuracy
      0.830726
      1.000000



In [186]:

    
df[df['epochs']>=30][['epochs', 'noise_accuracy']].astype(np.float32).corr()









    Out[186]:







  
    
      
      epochs
      noise_accuracy
    
  
  
    
      epochs
      1.00000
      0.80023
    
    
      noise_accuracy
      0.80023
      1.00000

Test accuracy seems more correlated to number of epochs then test accuracy, but the difference is small, might be due to randomness

A look at other possible correlations



In [207]:

    
tunable_params_general = ['learning_rate', 'learning_rate_gamma', 'weight_decay', 'momentum', 'batch_size', 'batches_in_epoch']
tunable_params_sparsity = ['boost_strength', 'boost_strength_factor', 'k_inference_factor', 'cnn_percent_on', 'cnn_weight_sparsity']
tunable_params = tunable_params_general + tunable_params_sparsity
performance_metrics = ['noise_accuracy_max', 'test_accuracy_max']
corr_params = tunable_params + performance_metrics

df[corr_params].astype(np.float32).corr()









    Out[207]:







  
    
      
      learning_rate
      learning_rate_gamma
      weight_decay
      momentum
      batch_size
      batches_in_epoch
      boost_strength
      boost_strength_factor
      k_inference_factor
      cnn_percent_on
      cnn_weight_sparsity
      noise_accuracy_max
      test_accuracy_max
    
  
  
    
      learning_rate
      1.000000
      0.012162
      -0.017390
      0.057644
      -0.037703
      -0.015762
      -0.117501
      -0.002551
      -0.034224
      -0.017177
      0.001650
      -0.145329
      -0.097126
    
    
      learning_rate_gamma
      0.012162
      1.000000
      0.013388
      0.037070
      -0.023368
      -0.035762
      0.029487
      0.068997
      -0.034483
      -0.121417
      0.063874
      -0.039836
      -0.033162
    
    
      weight_decay
      -0.017390
      0.013388
      1.000000
      0.038532
      -0.015457
      -0.064426
      -0.061364
      0.088593
      0.048938
      -0.037099
      -0.074064
      -0.085577
      -0.047994
    
    
      momentum
      0.057644
      0.037070
      0.038532
      1.000000
      0.058849
      0.058075
      0.009580
      -0.032437
      -0.055015
      0.007072
      -0.106112
      -0.383913
      -0.276776
    
    
      batch_size
      -0.037703
      -0.023368
      -0.015457
      0.058849
      1.000000
      0.040585
      0.018939
      -0.066548
      -0.045015
      0.007049
      0.120368
      0.136283
      0.146644
    
    
      batches_in_epoch
      -0.015762
      -0.035762
      -0.064426
      0.058075
      0.040585
      1.000000
      0.012808
      -0.055749
      -0.001159
      -0.042075
      -0.062545
      0.115584
      0.107763
    
    
      boost_strength
      -0.117501
      0.029487
      -0.061364
      0.009580
      0.018939
      0.012808
      1.000000
      -0.090654
      0.077328
      -0.026559
      0.087035
      0.040313
      0.022680
    
    
      boost_strength_factor
      -0.002551
      0.068997
      0.088593
      -0.032437
      -0.066548
      -0.055749
      -0.090654
      1.000000
      -0.011040
      -0.028418
      -0.104310
      0.009095
      0.003375
    
    
      k_inference_factor
      -0.034224
      -0.034483
      0.048938
      -0.055015
      -0.045015
      -0.001159
      0.077328
      -0.011040
      1.000000
      0.003868
      0.059874
      -0.043341
      -0.071874
    
    
      cnn_percent_on
      -0.017177
      -0.121417
      -0.037099
      0.007072
      0.007049
      -0.042075
      -0.026559
      -0.028418
      0.003868
      1.000000
      -0.004209
      0.336341
      0.368590
    
    
      cnn_weight_sparsity
      0.001650
      0.063874
      -0.074064
      -0.106112
      0.120368
      -0.062545
      0.087035
      -0.104310
      0.059874
      -0.004209
      1.000000
      0.061532
      0.042322
    
    
      noise_accuracy_max
      -0.145329
      -0.039836
      -0.085577
      -0.383913
      0.136283
      0.115584
      0.040313
      0.009095
      -0.043341
      0.336341
      0.061532
      1.000000
      0.952541
    
    
      test_accuracy_max
      -0.097126
      -0.033162
      -0.047994
      -0.276776
      0.146644
      0.107763
      0.022680
      0.003375
      -0.071874
      0.368590
      0.042322
      0.952541
      1.000000



In [208]:

    
df[corr_params].astype(np.float32).corr() > 0.3









    Out[208]:







  
    
      
      learning_rate
      learning_rate_gamma
      weight_decay
      momentum
      batch_size
      batches_in_epoch
      boost_strength
      boost_strength_factor
      k_inference_factor
      cnn_percent_on
      cnn_weight_sparsity
      noise_accuracy_max
      test_accuracy_max
    
  
  
    
      learning_rate
      True
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      learning_rate_gamma
      False
      True
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      weight_decay
      False
      False
      True
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      momentum
      False
      False
      False
      True
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      batch_size
      False
      False
      False
      False
      True
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      batches_in_epoch
      False
      False
      False
      False
      False
      True
      False
      False
      False
      False
      False
      False
      False
    
    
      boost_strength
      False
      False
      False
      False
      False
      False
      True
      False
      False
      False
      False
      False
      False
    
    
      boost_strength_factor
      False
      False
      False
      False
      False
      False
      False
      True
      False
      False
      False
      False
      False
    
    
      k_inference_factor
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
      False
      False
      False
    
    
      cnn_percent_on
      False
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
      True
      True
    
    
      cnn_weight_sparsity
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
      False
    
    
      noise_accuracy_max
      False
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
      True
      True
    
    
      test_accuracy_max
      False
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
      True
      True



In [209]:

    
df[corr_params].astype(np.float32).corr() < -0.3









    Out[209]:







  
    
      
      learning_rate
      learning_rate_gamma
      weight_decay
      momentum
      batch_size
      batches_in_epoch
      boost_strength
      boost_strength_factor
      k_inference_factor
      cnn_percent_on
      cnn_weight_sparsity
      noise_accuracy_max
      test_accuracy_max
    
  
  
    
      learning_rate
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      learning_rate_gamma
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      weight_decay
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      momentum
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      True
      False
    
    
      batch_size
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      batches_in_epoch
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      boost_strength
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      boost_strength_factor
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      k_inference_factor
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      cnn_percent_on
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      cnn_weight_sparsity
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      noise_accuracy_max
      False
      False
      False
      True
      False
      False
      False
      False
      False
      False
      False
      False
      False
    
    
      test_accuracy_max
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False
      False

Positive correlation: cnn_percent_on with noise_accuracy_max and test_accuracy_max
Negative correlation: momentum and noise_accuracy_max

Further analysis on the tunable hyperparameters



In [234]:

    
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from pprint import pprint



In [237]:

    
# Including all experiments with 30 or more epochs
df_inf = df[df['epochs']>=30]
y1 = df_inf['noise_accuracy_max']
y2 = df_inf['test_accuracy_max']
X = df_inf[tunable_params].astype(np.float32)

# adjust all X to same scale
scaler = StandardScaler()
X = scaler.fit_transform(X)

model_noise = LinearRegression()
model_noise.fit(X, y1)

print("\n Noise accuracy")
pprint(list(zip(tunable_params, model_noise.coef_)))

model_test = LinearRegression()
model_test.fit(X, y2)

print("\n Test accuracy")
pprint(list(zip(tunable_params, model_test.coef_)))









    



 Noise accuracy
[('learning_rate', -0.008320055931102738),
 ('learning_rate_gamma', 0.00010922925444718565),
 ('weight_decay', -0.007074033772320695),
 ('momentum', -0.023921331255530413),
 ('batch_size', 0.017012582555930997),
 ('batches_in_epoch', 0.0065225404456871194),
 ('boost_strength', 0.005106812347459978),
 ('boost_strength_factor', -0.00043002669351718305),
 ('k_inference_factor', -0.004576034582844502),
 ('cnn_percent_on', 0.008260245666037353),
 ('cnn_weight_sparsity', 0.0016398916756805678)]

 Test accuracy
[('learning_rate', -0.0037434259712621066),
 ('learning_rate_gamma', 0.00355353048512084),
 ('weight_decay', -0.00425357050234218),
 ('momentum', -0.03087870199997384),
 ('batch_size', 0.04314875995376824),
 ('batches_in_epoch', 0.016936418027689627),
 ('boost_strength', 0.013619723602916817),
 ('boost_strength_factor', -0.0018614337515772711),
 ('k_inference_factor', -0.0158375244155911),
 ('cnn_percent_on', 0.045455169207160875),
 ('cnn_weight_sparsity', 0.0009522081649769887)]



In [238]:

    
# Including all experiments with 90 or more epochs ("completed")
df_inf = df[df['epochs']>=90]
y1 = df_inf['noise_accuracy_max']
y2 = df_inf['test_accuracy_max']
X = df_inf[tunable_params].astype(np.float32)

# adjust all X to same scale
scaler = StandardScaler()
X = scaler.fit_transform(X)

model_noise = LinearRegression()
model_noise.fit(X, y1)

print("\n Noise accuracy")
pprint(list(zip(tunable_params, model_noise.coef_)))

model_test = LinearRegression()
model_test.fit(X, y2)

print("\n Test accuracy")
pprint(list(zip(tunable_params, model_test.coef_)))









    



 Noise accuracy
[('learning_rate', -0.013711040159944263),
 ('learning_rate_gamma', -0.0001959410878818779),
 ('weight_decay', -0.013152972568734067),
 ('momentum', -0.021176038112329668),
 ('batch_size', 0.010269630267931313),
 ('batches_in_epoch', -0.00014625127560307852),
 ('boost_strength', 0.0010431710438098365),
 ('boost_strength_factor', -0.00037476933729079377),
 ('k_inference_factor', -0.0008177660350967232),
 ('cnn_percent_on', -0.0006308944388026454),
 ('cnn_weight_sparsity', -0.001361569311950573)]

 Test accuracy
[('learning_rate', 0.003507782817232606),
 ('learning_rate_gamma', -0.0014039065080307283),
 ('weight_decay', 0.000412071505479126),
 ('momentum', -0.007036921311942076),
 ('batch_size', 0.013137881191602896),
 ('batches_in_epoch', 0.0045387770677112515),
 ('boost_strength', 0.00018761581131504984),
 ('boost_strength_factor', -0.0024625983394766736),
 ('k_inference_factor', -0.002469787063475652),
 ('cnn_percent_on', 0.0201868856868994),
 ('cnn_weight_sparsity', 0.009009976498501736)]

As correlation already showed, cnn_percent_on and momentum seems to have the greatest impact. The first is expected, but momentum is actually an interesting finding, specially since it is negatively correlated in the sparse model - a smaller momentum would lead to a higher noise accuracy. Why is that?
cnn_percent_on impacts specially the test accuracy, indicating sparsity would actually have a negative impact on test performance

What is the ideal value for each variable that maximizes both accuracies?



In [254]:

    
# Only included complete experiments
df_inf = df[df['epochs']>=90][corr_params]



In [273]:

    
def stats(arr):
  return [round(v, 4) for v in [np.min(arr), np.mean(arr), np.max(arr)]]



In [277]:

    
df_inf.sort_values('test_accuracy_max', ascending=False)[tunable_params].head(5).apply(stats)









    Out[277]:





learning_rate            [0.0628, 0.0985, 0.1241]
learning_rate_gamma       [0.0511, 0.0907, 0.129]
weight_decay              [0.0006, 0.0008, 0.001]
momentum                 [0.3494, 0.5863, 0.8658]
batch_size                       [64, 115.2, 128]
batches_in_epoch                [399, 454.8, 529]
boost_strength           [1.3055, 1.6327, 1.7856]
boost_strength_factor    [0.5093, 0.6222, 0.8509]
k_inference_factor       [0.8662, 1.0072, 1.0956]
cnn_percent_on           [0.2615, 0.3124, 0.3481]
cnn_weight_sparsity      [0.8783, 0.9242, 0.9664]
dtype: object



In [278]:

    
df_inf.sort_values('test_accuracy_max', ascending=True)[tunable_params].head(5).apply(stats)









    Out[278]:





learning_rate              [0.0532, 0.09, 0.1284]
learning_rate_gamma      [0.0705, 0.1158, 0.1582]
weight_decay             [0.0006, 0.0008, 0.0011]
momentum                 [0.4962, 0.8295, 0.9323]
batch_size                       [64, 102.4, 128]
batches_in_epoch                [325, 412.4, 538]
boost_strength            [0.948, 1.2459, 1.7351]
boost_strength_factor    [0.5366, 0.8112, 0.9749]
k_inference_factor       [0.9294, 1.0544, 1.1601]
cnn_percent_on            [0.153, 0.2288, 0.3045]
cnn_weight_sparsity      [0.2458, 0.4239, 0.6889]
dtype: object

Why is boost strength affecting the test accuracy? It does not have the same impact in noise accuracy. Hypothesis: it is a regularization for the amount of sparsity in the model.
Higher cnn_percent_on and cnn_weight_sparsity are indicatives of better test accuracy. Incidentally, they are also indicatives of better noise accuracy, which is unexpected
Lower weight decay improves noise accuracy, however has no impact on test accuracy. Weight decay would just make the network even more sparse in the cases where it is already too sparse, which can be a bigger evidence that too much sparsity is impacting performance
As expected, higher batch size and more batches per epoch improve both metrics
For noise accuracy, lower learning rate is preferred
Momentum ideal value seems to be between 0.5 and 0.6, and it has a high impact on the model. This is unexpected as usual values for SGD momentum in literature is around 0.9



In [279]:

    
df_inf.sort_values('noise_accuracy_max', ascending=False)[tunable_params].head(5).apply(stats)









    Out[279]:





learning_rate            [0.0506, 0.0712, 0.1104]
learning_rate_gamma       [0.053, 0.1039, 0.1594]
weight_decay             [0.0003, 0.0006, 0.0011]
momentum                 [0.3355, 0.4981, 0.6752]
batch_size                       [64, 102.4, 128]
batches_in_epoch                [325, 500.0, 585]
boost_strength            [0.9152, 1.381, 1.7915]
boost_strength_factor    [0.5029, 0.7229, 0.8583]
k_inference_factor       [0.8895, 0.9761, 1.0884]
cnn_percent_on           [0.2651, 0.2939, 0.3426]
cnn_weight_sparsity      [0.4513, 0.8046, 0.9981]
dtype: object



In [280]:

    
df_inf.sort_values('noise_accuracy_max', ascending=True)[tunable_params].head(5).apply(stats)









    Out[280]:





learning_rate            [0.0703, 0.1158, 0.1392]
learning_rate_gamma      [0.0705, 0.1131, 0.1546]
weight_decay              [0.0007, 0.001, 0.0011]
momentum                 [0.4756, 0.7868, 0.9323]
batch_size                        [64, 89.6, 128]
batches_in_epoch                [338, 454.8, 545]
boost_strength            [0.948, 1.3371, 1.7351]
boost_strength_factor     [0.5046, 0.768, 0.9824]
k_inference_factor       [0.8614, 1.0332, 1.1497]
cnn_percent_on            [0.153, 0.2465, 0.3488]
cnn_weight_sparsity      [0.2458, 0.4096, 0.6889]
dtype: object

Supporting classes



In [131]:

    
class RayTuneExperimentBrowser(object):

  """
  Class for browsing and manipulating experiment results directories created
  by Ray Tune.
  """

  def __init__(self, experiment_path):
    self.experiment_path = os.path.abspath(experiment_path)
    self.experiment_states = self._get_experiment_states(
      self.experiment_path, exit_on_fail=True)

    self.progress = {}
    self.exp_directories = {}
    self.checkpoint_directories = {}
    self.params = {}
    for experiment_state in self.experiment_states:
      self._read_experiment(experiment_state)


  def _read_experiment(self, experiment_state):
    checkpoint_dicts = experiment_state["checkpoints"]
    checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]

    for exp in checkpoint_dicts:
      if exp.get("logdir", None) is None:
        continue
      exp_dir = os.path.basename(exp["logdir"])
      csv = os.path.join(self.experiment_path, exp_dir, "progress.csv")
      self.progress[exp["experiment_tag"]] = pd.read_csv(csv)
      self.exp_directories[exp["experiment_tag"]] = os.path.abspath(
        os.path.join(self.experiment_path, exp_dir))

      # Figure out checkpoint file (.pt or .pth) if it exists. For some reason
      # we need to switch to the directory in order for glob to work.
      ed = os.path.abspath(os.path.join(self.experiment_path, exp_dir))
      os.chdir(ed)
      cds = glob.glob("checkpoint*")
      if len(cds) > 0:
        cd = max(cds)
        cf = glob.glob(os.path.join(cd, "*.pt"))
        cf += glob.glob(os.path.join(cd, "*.pth"))
        if len(cf) > 0:
          self.checkpoint_directories[exp["experiment_tag"]] = os.path.join(
            ed, cf[0])
        else:
          self.checkpoint_directories[exp["experiment_tag"]] = ""
      else:
        self.checkpoint_directories[exp["experiment_tag"]] = ""

      # Read in the configs for this experiment
      paramsFile = os.path.join(self.experiment_path, exp_dir, "params.json")
      with open(paramsFile) as f:
        self.params[exp["experiment_tag"]] = json.load(f)


  def get_value(self, exp_substring="",
                tags=["test_accuracy", "noise_accuracy"],
                which='max'):
    """
    For every experiment whose name matches exp_substring, scan the history
    and return the appropriate value associated with tag.
    'which' can be one of the following:
        last: returns the last value
         min: returns the minimum value
         max: returns the maximum value
      median: returns the median value
    
    Returns a pandas dataframe with two columns containing name and tag value
    """
    # Collect experiment names that match exp at all
    exps = [e for e in self.progress if exp_substring in e]

    # empty histories always return None
    columns = ['Experiment Name']
    
    # add the columns names for main tags
    for tag in tags:
      columns.append(tag)
      columns.append(tag+'_'+which)
      if which in ["max", "min"]:
        columns.append("epoch_"+str(tag))
    
    # add training iterations
    columns.append('epochs')
    
    # add the remaining variables
    columns.extend(self.params[exps[0]].keys())
  
    all_values = []
    for e in exps:
      # values for the experiment name
      values = [e]
      # values for the main tags
      for tag in tags:
        values.append(self.progress[e][tag].iloc[-1])
        if which == "max":
          values.append(self.progress[e][tag].max())
          v = self.progress[e][tag].idxmax()
          values.append(v)
        elif which == "min":
          values.append(self.progress[e][tag].min())
          values.append(self.progress[e][tag].idxmin())
        elif which == "median":
          values.append(self.progress[e][tag].median())
        elif which == "last":
          values.append(self.progress[e][tag].iloc[-1])
        else:
          raise RuntimeError("Invalid value for which='{}'".format(which))

      # add number of epochs
      values.append(self.progress[e]['training_iteration'].iloc[-1])
          
      # remaining values
      for v in self.params[e].values():
        if isinstance(v,list):
          values.append(np.mean(v))
        else:
          values.append(v)         
      
      all_values.append(values)

    p = pd.DataFrame(all_values, columns=columns)
      
    return p


  def get_checkpoint_file(self, exp_substring=""):
    """
    For every experiment whose name matches exp_substring, return the
    full path to the checkpoint file. Returns a list of paths.
    """
    # Collect experiment names that match exp at all
    exps = [e for e in self.progress if exp_substring in e]

    paths = [self.checkpoint_directories[e] for e in exps]

    return paths


  def _get_experiment_states(self, experiment_path, exit_on_fail=False):
    """
    Return every experiment state JSON file in the path as a list of dicts.
    The list is sorted such that newer experiments appear later.
    """
    experiment_path = os.path.expanduser(experiment_path)
    experiment_state_paths = glob.glob(
      os.path.join(experiment_path, "experiment_state*.json"))
    if not experiment_state_paths:
      if exit_on_fail:
        print("No experiment state found!")
        sys.exit(0)
      else:
        return

    experiment_state_paths = list(experiment_state_paths)
    experiment_state_paths.sort()
    experiment_states = []
    for experiment_filename in list(experiment_state_paths):

      with open(experiment_filename) as f:
        experiment_states.append(json.load(f))

    return experiment_states


  def get_parameters(self, sorted_experiments):
    for i,e in sorted_experiments.iterrows():
      if e['Experiment Name'] in self.params:
        params = self.params[e['Experiment Name']]
        print(params['cnn_percent_on'][0])

    print('test_accuracy')
    for i,e in sorted_experiments.iterrows():
      print(e['test_accuracy'])

    print('noise_accuracy')
    for i,e in sorted_experiments.iterrows():
      print(e['noise_accuracy'])


  def best_experiments(self, min_test_accuracy=0.86, min_noise_accuracy=0.785, sort_by="noise_accuracy"):
    """
    Return a dataframe containing all experiments whose best test_accuracy and
    noise_accuracy are above the specified thresholds.
    """
    best_accuracies = self.get_value()
    best_accuracies.sort_values(sort_by, axis=0, ascending=False,
                 inplace=True, na_position='last')
    columns = best_accuracies.columns
    best_experiments = pd.DataFrame(columns=columns)
    for i, row in best_accuracies.iterrows():
      if ((row["test_accuracy"] > min_test_accuracy)
           and (row["noise_accuracy"] > min_noise_accuracy)):
        best_experiments = best_experiments.append(row)

    return best_experiments


  def prune_checkpoints(self, max_test_accuracy=0.86, max_noise_accuracy=0.785):
    """
    TODO: delete the checkpoints for all models whose best test_accuracy and
    noise_accuracy are below the specified thresholds.
    """
    pass



In [ ]:

	Experiment Name	test_accuracy	test_accuracy_max	epoch_test_accuracy	noise_accuracy	noise_accuracy_max	epoch_noise_accuracy	epochs	batch_size	batches_in_epoch	...	path	repetitions	restore_supported	sync_function	test_batch_size	test_batches_in_epoch	upload_dir	use_max_pooling	weight_decay	weight_sparsity
9	9_batch_size=128,batches_in_epoch=403,boost_st...	0.7021	0.7021	89	0.2357	0.2584	52	90	128	403	...	~/nta/results	150	True	aws s3 sync `dirname {local_dir}` {remote_dir}...	128	500	s3://lsouza/ray/results	True	0.000990	NaN
175	175_batch_size=128,batches_in_epoch=425,boost_...	0.7017	0.7091	140	0.2208	0.2528	87	164	128	425	...	~/nta/results	150	True	aws s3 sync `dirname {local_dir}` {remote_dir}...	128	500	s3://lsouza/ray/results	True	0.000609	NaN
183	183_batch_size=128,batches_in_epoch=529,boost_...	0.7005	0.7005	89	0.2537	0.2821	29	90	128	529	...	~/nta/results	150	True	aws s3 sync `dirname {local_dir}` {remote_dir}...	128	500	s3://lsouza/ray/results	True	0.000832	NaN
277	277_batch_size=128,batches_in_epoch=518,boost_...	0.6990	0.6990	89	0.2584	0.2932	71	90	128	518	...	~/nta/results	150	True	aws s3 sync `dirname {local_dir}` {remote_dir}...	128	500	s3://lsouza/ray/results	True	0.000680	NaN
11	11_batch_size=128,batches_in_epoch=564,boost_s...	0.6965	0.6989	159	0.2341	0.2620	85	164	128	564	...	~/nta/results	150	True	aws s3 sync `dirname {local_dir}` {remote_dir}...	128	500	s3://lsouza/ray/results	True	0.000698	NaN

	test_accuracy_max	noise_accuracy_max
test_accuracy_max	1.000000	-0.278818
noise_accuracy_max	-0.278818	1.000000

	test_accuracy_max	noise_accuracy_max
test_accuracy_max	1.000000	0.584994
noise_accuracy_max	0.584994	1.000000

	learning_rate	learning_rate_gamma	weight_decay	momentum	batch_size	batches_in_epoch	boost_strength	boost_strength_factor	k_inference_factor	cnn_percent_on	cnn_weight_sparsity	noise_accuracy_max	test_accuracy_max
learning_rate	1.000000	0.012162	-0.017390	0.057644	-0.037703	-0.015762	-0.117501	-0.002551	-0.034224	-0.017177	0.001650	-0.145329	-0.097126
learning_rate_gamma	0.012162	1.000000	0.013388	0.037070	-0.023368	-0.035762	0.029487	0.068997	-0.034483	-0.121417	0.063874	-0.039836	-0.033162
weight_decay	-0.017390	0.013388	1.000000	0.038532	-0.015457	-0.064426	-0.061364	0.088593	0.048938	-0.037099	-0.074064	-0.085577	-0.047994
momentum	0.057644	0.037070	0.038532	1.000000	0.058849	0.058075	0.009580	-0.032437	-0.055015	0.007072	-0.106112	-0.383913	-0.276776
batch_size	-0.037703	-0.023368	-0.015457	0.058849	1.000000	0.040585	0.018939	-0.066548	-0.045015	0.007049	0.120368	0.136283	0.146644
batches_in_epoch	-0.015762	-0.035762	-0.064426	0.058075	0.040585	1.000000	0.012808	-0.055749	-0.001159	-0.042075	-0.062545	0.115584	0.107763
boost_strength	-0.117501	0.029487	-0.061364	0.009580	0.018939	0.012808	1.000000	-0.090654	0.077328	-0.026559	0.087035	0.040313	0.022680
boost_strength_factor	-0.002551	0.068997	0.088593	-0.032437	-0.066548	-0.055749	-0.090654	1.000000	-0.011040	-0.028418	-0.104310	0.009095	0.003375
k_inference_factor	-0.034224	-0.034483	0.048938	-0.055015	-0.045015	-0.001159	0.077328	-0.011040	1.000000	0.003868	0.059874	-0.043341	-0.071874
cnn_percent_on	-0.017177	-0.121417	-0.037099	0.007072	0.007049	-0.042075	-0.026559	-0.028418	0.003868	1.000000	-0.004209	0.336341	0.368590
cnn_weight_sparsity	0.001650	0.063874	-0.074064	-0.106112	0.120368	-0.062545	0.087035	-0.104310	0.059874	-0.004209	1.000000	0.061532	0.042322
noise_accuracy_max	-0.145329	-0.039836	-0.085577	-0.383913	0.136283	0.115584	0.040313	0.009095	-0.043341	0.336341	0.061532	1.000000	0.952541
test_accuracy_max	-0.097126	-0.033162	-0.047994	-0.276776	0.146644	0.107763	0.022680	0.003375	-0.071874	0.368590	0.042322	0.952541	1.000000

	learning_rate	learning_rate_gamma	weight_decay	momentum	batch_size	batches_in_epoch	boost_strength	boost_strength_factor	k_inference_factor	cnn_percent_on	cnn_weight_sparsity	noise_accuracy_max	test_accuracy_max
learning_rate	True	False	False	False	False	False	False	False	False	False	False	False	False
learning_rate_gamma	False	True	False	False	False	False	False	False	False	False	False	False	False
weight_decay	False	False	True	False	False	False	False	False	False	False	False	False	False
momentum	False	False	False	True	False	False	False	False	False	False	False	False	False
batch_size	False	False	False	False	True	False	False	False	False	False	False	False	False
batches_in_epoch	False	False	False	False	False	True	False	False	False	False	False	False	False
boost_strength	False	False	False	False	False	False	True	False	False	False	False	False	False
boost_strength_factor	False	False	False	False	False	False	False	True	False	False	False	False	False
k_inference_factor	False	False	False	False	False	False	False	False	True	False	False	False	False
cnn_percent_on	False	False	False	False	False	False	False	False	False	True	False	True	True
cnn_weight_sparsity	False	False	False	False	False	False	False	False	False	False	True	False	False
noise_accuracy_max	False	False	False	False	False	False	False	False	False	True	False	True	True
test_accuracy_max	False	False	False	False	False	False	False	False	False	True	False	True	True