In [5]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from browser import *
In [12]:
TUNABLE = ['m_groups', 'k_winners', 'eps', 'fpartition', 'forget_mu', 'boost_strength', 'boost_strength_factor']
NUMERIC_TUNABLE = ['m_groups', 'k_winners', 'eps', 'forget_mu', 'boost_strength', 'boost_strength_factor']
In [7]:
exps = [
'Flat_PTB_Explore',
'Flat_PTB_Snipe',
'Flat_PTB_Snipe2',
]
paths = [os.path.expanduser("~/s3_jgordon/ray/results/{}".format(e)) for e in exps]
df = load_many(paths)
In [13]:
df.loc[~df.k_winners_pct.isnull(), 'k_winners'] = df.k_winners_pct * df.m_groups
df.k_winners = df.k_winners.astype(int)
for tp in TUNABLE:
if tp not in df:
df[tp] = None
df.fillna(value={
'forget_mu': 0.0,
'dropout_p': 0.0,
'balance_part_winners': False
}, inplace=True)
In [14]:
def scatter_all_tunable(df, params=NUMERIC_TUNABLE, ppl_cutoff=220):
fig, axs = plt.subplots(len(params), 1, figsize=(8, 14),
dpi=144,
gridspec_kw={'hspace': 0.7})
for ax, p in zip(axs, params):
df[df.val_pred_ppl_min <= ppl_cutoff].plot(kind='scatter', x=p, y='val_pred_ppl_min', c='train_pred_ppl_min', colormap='viridis', ax=ax)
ax.set_title("%s vs min val PPL" % p)
plt.show()
In [15]:
scatter_all_tunable(df)
In [20]:
df[df.embedding_kind.isnull()].sort_values('val_pred_ppl_min')[TUNABLE + ['weight_sparsity', 'x_b_norm', 'balance_part_winners', 'mult_integration', 'embedding_kind'] +
['val_pred_ppl_min', 'val_pred_acc_max', 'train_pred_ppl_min', 'epoch_val_pred_ppl']]
Out[20]:
In [21]:
df[~df.embedding_kind.isnull()].sort_values('val_pred_ppl_min')[TUNABLE + ['weight_sparsity', 'x_b_norm', 'balance_part_winners', 'mult_integration', 'embedding_kind'] +
['val_pred_ppl_min', 'val_pred_acc_max', 'train_pred_ppl_min', 'epoch_val_pred_ppl']]
Out[21]:
In [41]:
fig, axs = plt.subplots(1, 2, dpi=144, figsize=(10, 5), gridspec_kw={'wspace': 0.5})
df[df.val_pred_ppl_min < 200].plot(kind='scatter', x='k_winners', y='fpartition', c='val_pred_ppl_min', colormap='viridis', ax=axs[0])
df[df.val_pred_ppl_min < 200].plot(kind='scatter', x='k_winners', y='m_groups', c='val_pred_ppl_min', colormap='viridis', ax=axs[1])
plt.show()
In [42]:
df['norm_int'] = 0
df.loc[df.x_b_norm, 'norm_int'] = 1
df.plot(kind='scatter', x='norm_int', y='epoch_val_pred_ppl')
plt.title("Normalization slows down time-to-peak")
Out[42]: