In [1]:
%load_ext autoreload
%autoreload 2

In [130]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import glob
import tabulate
import pprint
import click
import numpy as np
import pandas as pd
from ray.tune.commands import *

Load data and general exploration


In [132]:
browser = RayTuneExperimentBrowser(os.path.expanduser("~/nta/results/VGG19SparseFull"))

In [133]:
df = browser.best_experiments(min_test_accuracy=0.0, min_noise_accuracy=0.0, sort_by="test_accuracy")


/Users/lsouza/miniconda3/envs/numenta/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3118: RuntimeWarning: Mean of empty slice.
  out=out, **kwargs)
/Users/lsouza/miniconda3/envs/numenta/lib/python3.7/site-packages/numpy/core/_methods.py:85: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)

In [134]:
df.head(5)


Out[134]:
Experiment Name test_accuracy test_accuracy_max epoch_test_accuracy noise_accuracy noise_accuracy_max epoch_noise_accuracy epochs batch_size batches_in_epoch ... path repetitions restore_supported sync_function test_batch_size test_batches_in_epoch upload_dir use_max_pooling weight_decay weight_sparsity
9 9_batch_size=128,batches_in_epoch=403,boost_st... 0.7021 0.7021 89 0.2357 0.2584 52 90 128 403 ... ~/nta/results 150 True aws s3 sync `dirname {local_dir}` {remote_dir}... 128 500 s3://lsouza/ray/results True 0.000990 NaN
175 175_batch_size=128,batches_in_epoch=425,boost_... 0.7017 0.7091 140 0.2208 0.2528 87 164 128 425 ... ~/nta/results 150 True aws s3 sync `dirname {local_dir}` {remote_dir}... 128 500 s3://lsouza/ray/results True 0.000609 NaN
183 183_batch_size=128,batches_in_epoch=529,boost_... 0.7005 0.7005 89 0.2537 0.2821 29 90 128 529 ... ~/nta/results 150 True aws s3 sync `dirname {local_dir}` {remote_dir}... 128 500 s3://lsouza/ray/results True 0.000832 NaN
277 277_batch_size=128,batches_in_epoch=518,boost_... 0.6990 0.6990 89 0.2584 0.2932 71 90 128 518 ... ~/nta/results 150 True aws s3 sync `dirname {local_dir}` {remote_dir}... 128 500 s3://lsouza/ray/results True 0.000680 NaN
11 11_batch_size=128,batches_in_epoch=564,boost_s... 0.6965 0.6989 159 0.2341 0.2620 85 164 128 564 ... ~/nta/results 150 True aws s3 sync `dirname {local_dir}` {remote_dir}... 128 500 s3://lsouza/ray/results True 0.000698 NaN

5 rows × 49 columns


In [136]:
df.columns


Out[136]:
Index(['Experiment Name', 'test_accuracy', 'test_accuracy_max',
       'epoch_test_accuracy', 'noise_accuracy', 'noise_accuracy_max',
       'epoch_noise_accuracy', 'epochs', 'batch_size', 'batches_in_epoch',
       'batches_in_first_epoch', 'block_sizes', 'boost_strength',
       'boost_strength_factor', 'checkpoint_at_end', 'cnn_kernel_size',
       'cnn_out_channels', 'cnn_percent_on', 'cnn_weight_sparsity',
       'cpu_percentage', 'data_dir', 'dataset', 'experiment',
       'first_epoch_batch_size', 'gpu_percentage', 'input_shape', 'iterations',
       'k_inference_factor', 'learning_rate', 'learning_rate_gamma',
       'linear_n', 'linear_percent_on', 'lr_step_schedule', 'momentum', 'name',
       'network_type', 'num_cpus', 'num_gpus', 'output_size', 'path',
       'repetitions', 'restore_supported', 'sync_function', 'test_batch_size',
       'test_batches_in_epoch', 'upload_dir', 'use_max_pooling',
       'weight_decay', 'weight_sparsity'],
      dtype='object')

In [137]:
df.iloc[0]


Out[137]:
Experiment Name           9_batch_size=128,batches_in_epoch=403,boost_st...
test_accuracy                                                        0.7021
test_accuracy_max                                                    0.7021
epoch_test_accuracy                                                      89
noise_accuracy                                                       0.2357
noise_accuracy_max                                                   0.2584
epoch_noise_accuracy                                                     52
epochs                                                                   90
batch_size                                                              128
batches_in_epoch                                                        403
batches_in_first_epoch                                                  600
block_sizes                                                             3.2
boost_strength                                                      1.73176
boost_strength_factor                                              0.626255
checkpoint_at_end                                                      True
cnn_kernel_size                                                           3
cnn_out_channels                                                      294.4
cnn_percent_on                                                      0.31395
cnn_weight_sparsity                                                0.917248
cpu_percentage                                                            1
data_dir                                                     ~/nta/datasets
dataset                                                            CIFAR100
experiment                                                             grid
first_epoch_batch_size                                                    4
gpu_percentage                                                        0.165
input_shape                                                         22.3333
iterations                                                              164
k_inference_factor                                                 0.977242
learning_rate                                                      0.102157
learning_rate_gamma                                               0.0510721
linear_n                                                                NaN
linear_percent_on                                                       NaN
lr_step_schedule                                                      101.5
momentum                                                           0.399231
name                                                        VGG19SparseFull
network_type                                                            vgg
num_cpus                                                                 31
num_gpus                                                                  4
output_size                                                             100
path                                                          ~/nta/results
repetitions                                                             150
restore_supported                                                      True
sync_function             aws s3 sync `dirname {local_dir}` {remote_dir}...
test_batch_size                                                         128
test_batches_in_epoch                                                   500
upload_dir                                          s3://lsouza/ray/results
use_max_pooling                                                        True
weight_decay                                                     0.00099037
weight_sparsity                                                         NaN
Name: 9, dtype: object

Epochs and Accuracy exploration


In [155]:
len(df[df['epochs']==164])


Out[155]:
45

In [156]:
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].corr()


Out[156]:
test_accuracy_max noise_accuracy_max
test_accuracy_max 1.000000 -0.278818
noise_accuracy_max -0.278818 1.000000

In [157]:
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].min()


Out[157]:
test_accuracy_max     0.5826
noise_accuracy_max    0.2199
dtype: float64

In [158]:
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].mean()


Out[158]:
test_accuracy_max     0.663829
noise_accuracy_max    0.288516
dtype: float64

In [159]:
df[df['epochs']==164][['test_accuracy_max', 'noise_accuracy_max']].max()


Out[159]:
test_accuracy_max     0.7091
noise_accuracy_max    0.3271
dtype: float64

In [160]:
len(df[df['epochs']==90])


Out[160]:
43

In [161]:
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].corr()


Out[161]:
test_accuracy_max noise_accuracy_max
test_accuracy_max 1.000000 0.584994
noise_accuracy_max 0.584994 1.000000

In [162]:
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].min()


Out[162]:
test_accuracy_max     0.4906
noise_accuracy_max    0.1592
dtype: float64

In [163]:
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].mean()


Out[163]:
test_accuracy_max     0.638398
noise_accuracy_max    0.252256
dtype: float64

In [164]:
df[df['epochs']==90][['test_accuracy_max', 'noise_accuracy_max']].max()


Out[164]:
test_accuracy_max     0.7021
noise_accuracy_max    0.2967
dtype: float64
  • It is interesting to see that the experiments that run 90 epochs have a very different correlation between noise and test accuracy than the experiments that run 164 epochs, even though the averages are very similar. What can that mean? After some point progress in test accuracy can lead to regress in noise accuracy? Which would imply the more the model fits to the standard data, the lesser the noise accuracy

In [185]:
df[df['epochs']>=30][['epochs', 'test_accuracy']].astype(np.float32).corr()


Out[185]:
epochs test_accuracy
epochs 1.000000 0.830726
test_accuracy 0.830726 1.000000

In [186]:
df[df['epochs']>=30][['epochs', 'noise_accuracy']].astype(np.float32).corr()


Out[186]:
epochs noise_accuracy
epochs 1.00000 0.80023
noise_accuracy 0.80023 1.00000
  • Test accuracy seems more correlated to number of epochs then test accuracy, but the difference is small, might be due to randomness

A look at other possible correlations


In [207]:
tunable_params_general = ['learning_rate', 'learning_rate_gamma', 'weight_decay', 'momentum', 'batch_size', 'batches_in_epoch']
tunable_params_sparsity = ['boost_strength', 'boost_strength_factor', 'k_inference_factor', 'cnn_percent_on', 'cnn_weight_sparsity']
tunable_params = tunable_params_general + tunable_params_sparsity
performance_metrics = ['noise_accuracy_max', 'test_accuracy_max']
corr_params = tunable_params + performance_metrics

df[corr_params].astype(np.float32).corr()


Out[207]:
learning_rate learning_rate_gamma weight_decay momentum batch_size batches_in_epoch boost_strength boost_strength_factor k_inference_factor cnn_percent_on cnn_weight_sparsity noise_accuracy_max test_accuracy_max
learning_rate 1.000000 0.012162 -0.017390 0.057644 -0.037703 -0.015762 -0.117501 -0.002551 -0.034224 -0.017177 0.001650 -0.145329 -0.097126
learning_rate_gamma 0.012162 1.000000 0.013388 0.037070 -0.023368 -0.035762 0.029487 0.068997 -0.034483 -0.121417 0.063874 -0.039836 -0.033162
weight_decay -0.017390 0.013388 1.000000 0.038532 -0.015457 -0.064426 -0.061364 0.088593 0.048938 -0.037099 -0.074064 -0.085577 -0.047994
momentum 0.057644 0.037070 0.038532 1.000000 0.058849 0.058075 0.009580 -0.032437 -0.055015 0.007072 -0.106112 -0.383913 -0.276776
batch_size -0.037703 -0.023368 -0.015457 0.058849 1.000000 0.040585 0.018939 -0.066548 -0.045015 0.007049 0.120368 0.136283 0.146644
batches_in_epoch -0.015762 -0.035762 -0.064426 0.058075 0.040585 1.000000 0.012808 -0.055749 -0.001159 -0.042075 -0.062545 0.115584 0.107763
boost_strength -0.117501 0.029487 -0.061364 0.009580 0.018939 0.012808 1.000000 -0.090654 0.077328 -0.026559 0.087035 0.040313 0.022680
boost_strength_factor -0.002551 0.068997 0.088593 -0.032437 -0.066548 -0.055749 -0.090654 1.000000 -0.011040 -0.028418 -0.104310 0.009095 0.003375
k_inference_factor -0.034224 -0.034483 0.048938 -0.055015 -0.045015 -0.001159 0.077328 -0.011040 1.000000 0.003868 0.059874 -0.043341 -0.071874
cnn_percent_on -0.017177 -0.121417 -0.037099 0.007072 0.007049 -0.042075 -0.026559 -0.028418 0.003868 1.000000 -0.004209 0.336341 0.368590
cnn_weight_sparsity 0.001650 0.063874 -0.074064 -0.106112 0.120368 -0.062545 0.087035 -0.104310 0.059874 -0.004209 1.000000 0.061532 0.042322
noise_accuracy_max -0.145329 -0.039836 -0.085577 -0.383913 0.136283 0.115584 0.040313 0.009095 -0.043341 0.336341 0.061532 1.000000 0.952541
test_accuracy_max -0.097126 -0.033162 -0.047994 -0.276776 0.146644 0.107763 0.022680 0.003375 -0.071874 0.368590 0.042322 0.952541 1.000000

In [208]:
df[corr_params].astype(np.float32).corr() > 0.3


Out[208]:
learning_rate learning_rate_gamma weight_decay momentum batch_size batches_in_epoch boost_strength boost_strength_factor k_inference_factor cnn_percent_on cnn_weight_sparsity noise_accuracy_max test_accuracy_max
learning_rate True False False False False False False False False False False False False
learning_rate_gamma False True False False False False False False False False False False False
weight_decay False False True False False False False False False False False False False
momentum False False False True False False False False False False False False False
batch_size False False False False True False False False False False False False False
batches_in_epoch False False False False False True False False False False False False False
boost_strength False False False False False False True False False False False False False
boost_strength_factor False False False False False False False True False False False False False
k_inference_factor False False False False False False False False True False False False False
cnn_percent_on False False False False False False False False False True False True True
cnn_weight_sparsity False False False False False False False False False False True False False
noise_accuracy_max False False False False False False False False False True False True True
test_accuracy_max False False False False False False False False False True False True True

In [209]:
df[corr_params].astype(np.float32).corr() < -0.3


Out[209]:
learning_rate learning_rate_gamma weight_decay momentum batch_size batches_in_epoch boost_strength boost_strength_factor k_inference_factor cnn_percent_on cnn_weight_sparsity noise_accuracy_max test_accuracy_max
learning_rate False False False False False False False False False False False False False
learning_rate_gamma False False False False False False False False False False False False False
weight_decay False False False False False False False False False False False False False
momentum False False False False False False False False False False False True False
batch_size False False False False False False False False False False False False False
batches_in_epoch False False False False False False False False False False False False False
boost_strength False False False False False False False False False False False False False
boost_strength_factor False False False False False False False False False False False False False
k_inference_factor False False False False False False False False False False False False False
cnn_percent_on False False False False False False False False False False False False False
cnn_weight_sparsity False False False False False False False False False False False False False
noise_accuracy_max False False False True False False False False False False False False False
test_accuracy_max False False False False False False False False False False False False False
  • Positive correlation: cnn_percent_on with noise_accuracy_max and test_accuracy_max
  • Negative correlation: momentum and noise_accuracy_max

Further analysis on the tunable hyperparameters


In [234]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from pprint import pprint

In [237]:
# Including all experiments with 30 or more epochs
df_inf = df[df['epochs']>=30]
y1 = df_inf['noise_accuracy_max']
y2 = df_inf['test_accuracy_max']
X = df_inf[tunable_params].astype(np.float32)

# adjust all X to same scale
scaler = StandardScaler()
X = scaler.fit_transform(X)

model_noise = LinearRegression()
model_noise.fit(X, y1)

print("\n Noise accuracy")
pprint(list(zip(tunable_params, model_noise.coef_)))

model_test = LinearRegression()
model_test.fit(X, y2)

print("\n Test accuracy")
pprint(list(zip(tunable_params, model_test.coef_)))


 Noise accuracy
[('learning_rate', -0.008320055931102738),
 ('learning_rate_gamma', 0.00010922925444718565),
 ('weight_decay', -0.007074033772320695),
 ('momentum', -0.023921331255530413),
 ('batch_size', 0.017012582555930997),
 ('batches_in_epoch', 0.0065225404456871194),
 ('boost_strength', 0.005106812347459978),
 ('boost_strength_factor', -0.00043002669351718305),
 ('k_inference_factor', -0.004576034582844502),
 ('cnn_percent_on', 0.008260245666037353),
 ('cnn_weight_sparsity', 0.0016398916756805678)]

 Test accuracy
[('learning_rate', -0.0037434259712621066),
 ('learning_rate_gamma', 0.00355353048512084),
 ('weight_decay', -0.00425357050234218),
 ('momentum', -0.03087870199997384),
 ('batch_size', 0.04314875995376824),
 ('batches_in_epoch', 0.016936418027689627),
 ('boost_strength', 0.013619723602916817),
 ('boost_strength_factor', -0.0018614337515772711),
 ('k_inference_factor', -0.0158375244155911),
 ('cnn_percent_on', 0.045455169207160875),
 ('cnn_weight_sparsity', 0.0009522081649769887)]

In [238]:
# Including all experiments with 90 or more epochs ("completed")
df_inf = df[df['epochs']>=90]
y1 = df_inf['noise_accuracy_max']
y2 = df_inf['test_accuracy_max']
X = df_inf[tunable_params].astype(np.float32)

# adjust all X to same scale
scaler = StandardScaler()
X = scaler.fit_transform(X)

model_noise = LinearRegression()
model_noise.fit(X, y1)

print("\n Noise accuracy")
pprint(list(zip(tunable_params, model_noise.coef_)))

model_test = LinearRegression()
model_test.fit(X, y2)

print("\n Test accuracy")
pprint(list(zip(tunable_params, model_test.coef_)))


 Noise accuracy
[('learning_rate', -0.013711040159944263),
 ('learning_rate_gamma', -0.0001959410878818779),
 ('weight_decay', -0.013152972568734067),
 ('momentum', -0.021176038112329668),
 ('batch_size', 0.010269630267931313),
 ('batches_in_epoch', -0.00014625127560307852),
 ('boost_strength', 0.0010431710438098365),
 ('boost_strength_factor', -0.00037476933729079377),
 ('k_inference_factor', -0.0008177660350967232),
 ('cnn_percent_on', -0.0006308944388026454),
 ('cnn_weight_sparsity', -0.001361569311950573)]

 Test accuracy
[('learning_rate', 0.003507782817232606),
 ('learning_rate_gamma', -0.0014039065080307283),
 ('weight_decay', 0.000412071505479126),
 ('momentum', -0.007036921311942076),
 ('batch_size', 0.013137881191602896),
 ('batches_in_epoch', 0.0045387770677112515),
 ('boost_strength', 0.00018761581131504984),
 ('boost_strength_factor', -0.0024625983394766736),
 ('k_inference_factor', -0.002469787063475652),
 ('cnn_percent_on', 0.0201868856868994),
 ('cnn_weight_sparsity', 0.009009976498501736)]
  • As correlation already showed, cnn_percent_on and momentum seems to have the greatest impact. The first is expected, but momentum is actually an interesting finding, specially since it is negatively correlated in the sparse model - a smaller momentum would lead to a higher noise accuracy. Why is that?
  • cnn_percent_on impacts specially the test accuracy, indicating sparsity would actually have a negative impact on test performance

What is the ideal value for each variable that maximizes both accuracies?


In [254]:
# Only included complete experiments
df_inf = df[df['epochs']>=90][corr_params]

In [273]:
def stats(arr):
  return [round(v, 4) for v in [np.min(arr), np.mean(arr), np.max(arr)]]

In [277]:
df_inf.sort_values('test_accuracy_max', ascending=False)[tunable_params].head(5).apply(stats)


Out[277]:
learning_rate            [0.0628, 0.0985, 0.1241]
learning_rate_gamma       [0.0511, 0.0907, 0.129]
weight_decay              [0.0006, 0.0008, 0.001]
momentum                 [0.3494, 0.5863, 0.8658]
batch_size                       [64, 115.2, 128]
batches_in_epoch                [399, 454.8, 529]
boost_strength           [1.3055, 1.6327, 1.7856]
boost_strength_factor    [0.5093, 0.6222, 0.8509]
k_inference_factor       [0.8662, 1.0072, 1.0956]
cnn_percent_on           [0.2615, 0.3124, 0.3481]
cnn_weight_sparsity      [0.8783, 0.9242, 0.9664]
dtype: object

In [278]:
df_inf.sort_values('test_accuracy_max', ascending=True)[tunable_params].head(5).apply(stats)


Out[278]:
learning_rate              [0.0532, 0.09, 0.1284]
learning_rate_gamma      [0.0705, 0.1158, 0.1582]
weight_decay             [0.0006, 0.0008, 0.0011]
momentum                 [0.4962, 0.8295, 0.9323]
batch_size                       [64, 102.4, 128]
batches_in_epoch                [325, 412.4, 538]
boost_strength            [0.948, 1.2459, 1.7351]
boost_strength_factor    [0.5366, 0.8112, 0.9749]
k_inference_factor       [0.9294, 1.0544, 1.1601]
cnn_percent_on            [0.153, 0.2288, 0.3045]
cnn_weight_sparsity      [0.2458, 0.4239, 0.6889]
dtype: object
  • Why is boost strength affecting the test accuracy? It does not have the same impact in noise accuracy. Hypothesis: it is a regularization for the amount of sparsity in the model.
  • Higher cnn_percent_on and cnn_weight_sparsity are indicatives of better test accuracy. Incidentally, they are also indicatives of better noise accuracy, which is unexpected
  • Lower weight decay improves noise accuracy, however has no impact on test accuracy. Weight decay would just make the network even more sparse in the cases where it is already too sparse, which can be a bigger evidence that too much sparsity is impacting performance
  • As expected, higher batch size and more batches per epoch improve both metrics
  • For noise accuracy, lower learning rate is preferred
  • Momentum ideal value seems to be between 0.5 and 0.6, and it has a high impact on the model. This is unexpected as usual values for SGD momentum in literature is around 0.9

In [279]:
df_inf.sort_values('noise_accuracy_max', ascending=False)[tunable_params].head(5).apply(stats)


Out[279]:
learning_rate            [0.0506, 0.0712, 0.1104]
learning_rate_gamma       [0.053, 0.1039, 0.1594]
weight_decay             [0.0003, 0.0006, 0.0011]
momentum                 [0.3355, 0.4981, 0.6752]
batch_size                       [64, 102.4, 128]
batches_in_epoch                [325, 500.0, 585]
boost_strength            [0.9152, 1.381, 1.7915]
boost_strength_factor    [0.5029, 0.7229, 0.8583]
k_inference_factor       [0.8895, 0.9761, 1.0884]
cnn_percent_on           [0.2651, 0.2939, 0.3426]
cnn_weight_sparsity      [0.4513, 0.8046, 0.9981]
dtype: object

In [280]:
df_inf.sort_values('noise_accuracy_max', ascending=True)[tunable_params].head(5).apply(stats)


Out[280]:
learning_rate            [0.0703, 0.1158, 0.1392]
learning_rate_gamma      [0.0705, 0.1131, 0.1546]
weight_decay              [0.0007, 0.001, 0.0011]
momentum                 [0.4756, 0.7868, 0.9323]
batch_size                        [64, 89.6, 128]
batches_in_epoch                [338, 454.8, 545]
boost_strength            [0.948, 1.3371, 1.7351]
boost_strength_factor     [0.5046, 0.768, 0.9824]
k_inference_factor       [0.8614, 1.0332, 1.1497]
cnn_percent_on            [0.153, 0.2465, 0.3488]
cnn_weight_sparsity      [0.2458, 0.4096, 0.6889]
dtype: object

Supporting classes


In [131]:
class RayTuneExperimentBrowser(object):

  """
  Class for browsing and manipulating experiment results directories created
  by Ray Tune.
  """

  def __init__(self, experiment_path):
    self.experiment_path = os.path.abspath(experiment_path)
    self.experiment_states = self._get_experiment_states(
      self.experiment_path, exit_on_fail=True)

    self.progress = {}
    self.exp_directories = {}
    self.checkpoint_directories = {}
    self.params = {}
    for experiment_state in self.experiment_states:
      self._read_experiment(experiment_state)


  def _read_experiment(self, experiment_state):
    checkpoint_dicts = experiment_state["checkpoints"]
    checkpoint_dicts = [flatten_dict(g) for g in checkpoint_dicts]

    for exp in checkpoint_dicts:
      if exp.get("logdir", None) is None:
        continue
      exp_dir = os.path.basename(exp["logdir"])
      csv = os.path.join(self.experiment_path, exp_dir, "progress.csv")
      self.progress[exp["experiment_tag"]] = pd.read_csv(csv)
      self.exp_directories[exp["experiment_tag"]] = os.path.abspath(
        os.path.join(self.experiment_path, exp_dir))

      # Figure out checkpoint file (.pt or .pth) if it exists. For some reason
      # we need to switch to the directory in order for glob to work.
      ed = os.path.abspath(os.path.join(self.experiment_path, exp_dir))
      os.chdir(ed)
      cds = glob.glob("checkpoint*")
      if len(cds) > 0:
        cd = max(cds)
        cf = glob.glob(os.path.join(cd, "*.pt"))
        cf += glob.glob(os.path.join(cd, "*.pth"))
        if len(cf) > 0:
          self.checkpoint_directories[exp["experiment_tag"]] = os.path.join(
            ed, cf[0])
        else:
          self.checkpoint_directories[exp["experiment_tag"]] = ""
      else:
        self.checkpoint_directories[exp["experiment_tag"]] = ""

      # Read in the configs for this experiment
      paramsFile = os.path.join(self.experiment_path, exp_dir, "params.json")
      with open(paramsFile) as f:
        self.params[exp["experiment_tag"]] = json.load(f)


  def get_value(self, exp_substring="",
                tags=["test_accuracy", "noise_accuracy"],
                which='max'):
    """
    For every experiment whose name matches exp_substring, scan the history
    and return the appropriate value associated with tag.
    'which' can be one of the following:
        last: returns the last value
         min: returns the minimum value
         max: returns the maximum value
      median: returns the median value
    
    Returns a pandas dataframe with two columns containing name and tag value
    """
    # Collect experiment names that match exp at all
    exps = [e for e in self.progress if exp_substring in e]

    # empty histories always return None
    columns = ['Experiment Name']
    
    # add the columns names for main tags
    for tag in tags:
      columns.append(tag)
      columns.append(tag+'_'+which)
      if which in ["max", "min"]:
        columns.append("epoch_"+str(tag))
    
    # add training iterations
    columns.append('epochs')
    
    # add the remaining variables
    columns.extend(self.params[exps[0]].keys())
  
    all_values = []
    for e in exps:
      # values for the experiment name
      values = [e]
      # values for the main tags
      for tag in tags:
        values.append(self.progress[e][tag].iloc[-1])
        if which == "max":
          values.append(self.progress[e][tag].max())
          v = self.progress[e][tag].idxmax()
          values.append(v)
        elif which == "min":
          values.append(self.progress[e][tag].min())
          values.append(self.progress[e][tag].idxmin())
        elif which == "median":
          values.append(self.progress[e][tag].median())
        elif which == "last":
          values.append(self.progress[e][tag].iloc[-1])
        else:
          raise RuntimeError("Invalid value for which='{}'".format(which))

      # add number of epochs
      values.append(self.progress[e]['training_iteration'].iloc[-1])
          
      # remaining values
      for v in self.params[e].values():
        if isinstance(v,list):
          values.append(np.mean(v))
        else:
          values.append(v)         
      
      all_values.append(values)

    p = pd.DataFrame(all_values, columns=columns)
      
    return p


  def get_checkpoint_file(self, exp_substring=""):
    """
    For every experiment whose name matches exp_substring, return the
    full path to the checkpoint file. Returns a list of paths.
    """
    # Collect experiment names that match exp at all
    exps = [e for e in self.progress if exp_substring in e]

    paths = [self.checkpoint_directories[e] for e in exps]

    return paths


  def _get_experiment_states(self, experiment_path, exit_on_fail=False):
    """
    Return every experiment state JSON file in the path as a list of dicts.
    The list is sorted such that newer experiments appear later.
    """
    experiment_path = os.path.expanduser(experiment_path)
    experiment_state_paths = glob.glob(
      os.path.join(experiment_path, "experiment_state*.json"))
    if not experiment_state_paths:
      if exit_on_fail:
        print("No experiment state found!")
        sys.exit(0)
      else:
        return

    experiment_state_paths = list(experiment_state_paths)
    experiment_state_paths.sort()
    experiment_states = []
    for experiment_filename in list(experiment_state_paths):

      with open(experiment_filename) as f:
        experiment_states.append(json.load(f))

    return experiment_states


  def get_parameters(self, sorted_experiments):
    for i,e in sorted_experiments.iterrows():
      if e['Experiment Name'] in self.params:
        params = self.params[e['Experiment Name']]
        print(params['cnn_percent_on'][0])

    print('test_accuracy')
    for i,e in sorted_experiments.iterrows():
      print(e['test_accuracy'])

    print('noise_accuracy')
    for i,e in sorted_experiments.iterrows():
      print(e['noise_accuracy'])


  def best_experiments(self, min_test_accuracy=0.86, min_noise_accuracy=0.785, sort_by="noise_accuracy"):
    """
    Return a dataframe containing all experiments whose best test_accuracy and
    noise_accuracy are above the specified thresholds.
    """
    best_accuracies = self.get_value()
    best_accuracies.sort_values(sort_by, axis=0, ascending=False,
                 inplace=True, na_position='last')
    columns = best_accuracies.columns
    best_experiments = pd.DataFrame(columns=columns)
    for i, row in best_accuracies.iterrows():
      if ((row["test_accuracy"] > min_test_accuracy)
           and (row["noise_accuracy"] > min_noise_accuracy)):
        best_experiments = best_experiments.append(row)

    return best_experiments


  def prune_checkpoints(self, max_test_accuracy=0.86, max_noise_accuracy=0.785):
    """
    TODO: delete the checkpoints for all models whose best test_accuracy and
    noise_accuracy are below the specified thresholds.
    """
    pass

In [ ]: