In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
%matplotlib inline
from nn_src.imports import *
In [3]:
DATA_DIR = '/scratch/srasp/ppnn_data/'
RESULTS_DIR = '/export/home/srasp/repositories/ppnn/results/csv_files/'
In [4]:
def reset_weights(model):
session = K.get_session()
for layer in model.layers:
if hasattr(layer, 'kernel_initializer'):
layer.kernel.initializer.run(session=session)
In [5]:
def ensemble_scores(m, n, x_trn, y_trn, x_test, y_test, **kwargs):
trn_scores, test_scores, preds = [], [], []
for i in tqdm(range(n)):
reset_weights(m)
m.fit(x_trn, y_trn, **kwargs)
trn_scores.append(m.evaluate(x_trn, y_trn, 4096, verbose=0))
test_scores.append(m.evaluate(x_test, y_test, 4096, verbose=0))
preds.append(m.predict(x_test, 4096, verbose=0))
return trn_scores, test_scores, preds
In [6]:
def save_ensemble(preds, test_set, exp_name, save=True):
preds = np.array(preds)
preds[:, :, 1] = np.abs(preds[:, :, 1]) # Make sure std is positive
mean_preds = np.mean(preds, 0)
ens_score = crps_normal(mean_preds[:, 0], mean_preds[:, 1], test_set.targets).mean()
print(f'Ensemble test score = {ens_score}')
if save:
results_df = create_results_df(test_set.date_strs, test_set.station_ids, mean_preds[:, 0], mean_preds[:, 1])
print(f'Saved results in {RESULTS_DIR}{exp_name}.csv')
results_df.to_csv(f'{RESULTS_DIR}{exp_name}.csv')
In [7]:
def get_datasets(pickled_name, train_dates, test_dates=['2016-01-01', '2017-01-01'], aux=False, reload=False):
pickle_fn = f'{DATA_DIR}pickled/{pickled_name}'
if not os.path.exists(pickle_fn) or reload:
var_dict = aux_dict if aux else None
train_set, test_set = get_train_test_sets(
DATA_DIR,
train_dates,
test_dates,
aux_dict=var_dict,
)
# Save pickled dataset
with open(pickle_fn, 'wb') as f:
pickle.dump((train_set, test_set), f)
else:
with open(pickle_fn, 'rb') as f:
train_set, test_set = pickle.load(f)
return train_set, test_set
In [8]:
train_set, test_set = get_datasets('15_16.pkl', ['2015-01-01', '2016-01-01'], aux=False)
In [9]:
train_set.features.shape, train_set.targets.shape
Out[9]:
In [10]:
aux_train_set, aux_test_set = get_datasets('aux_15_16.pkl', ['2015-01-01', '2016-01-01'], aux=True)
In [11]:
n_features = aux_train_set.features.shape[1]; n_features
Out[11]:
In [12]:
fc = build_fc_model(2, 2, compile=True, lr=0.1)
In [13]:
fc.summary()
In [14]:
trn_scores, test_scores, preds = ensemble_scores(
fc, 10,
train_set.features, train_set.targets,
test_set.features, test_set.targets,
epochs=30, batch_size=4096, verbose=0,
)
In [15]:
test_scores
Out[15]:
In [16]:
save_ensemble(preds, test_set, 'fc_15')
In [17]:
fc_aux = build_fc_model(n_features, 2, compile=True, lr=0.02)
In [18]:
fc_aux.summary()
In [19]:
trn_scores, test_scores, preds = ensemble_scores(
fc_aux, 10,
aux_train_set.features, aux_train_set.targets,
aux_test_set.features, aux_test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [20]:
test_scores
Out[20]:
In [21]:
save_ensemble(preds, test_set, 'fc_aux_15')
In [32]:
nn_aux = build_hidden_model(n_features, 2, [32], compile=True, lr=0.02)
In [33]:
nn_aux.summary()
In [34]:
trn_scores, test_scores, preds = ensemble_scores(
nn_aux, 10,
aux_train_set.features, aux_train_set.targets,
aux_test_set.features, aux_test_set.targets,
epochs=15, batch_size=1024, verbose=0,
)
In [35]:
test_scores
Out[35]:
In [37]:
save_ensemble(preds, aux_test_set, 'nn_aux_15')
In [38]:
emb_size = 2
max_id = int(np.max([aux_train_set.cont_ids.max(), aux_test_set.cont_ids.max()]))
max_id
Out[38]:
In [23]:
fc_emb = build_emb_model(2, 2, [], emb_size, max_id, compile=True, lr=0.02)
In [24]:
fc_emb.summary()
In [25]:
trn_scores, test_scores, preds = ensemble_scores(
fc_emb, 10,
[train_set.features, train_set.cont_ids], train_set.targets,
[test_set.features, test_set.cont_ids], test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [26]:
test_scores
Out[26]:
In [27]:
save_ensemble(preds, test_set, 'fc_emb_15')
In [28]:
emb_size = 2
max_id = int(np.max([aux_train_set.cont_ids.max(), aux_test_set.cont_ids.max()]))
max_id
Out[28]:
In [29]:
fc_aux_emb = build_emb_model(n_features, 2, [], emb_size, max_id, compile=True, lr=0.02)
In [30]:
fc_aux_emb.summary()
In [31]:
trn_scores, test_scores, preds = ensemble_scores(
fc_aux_emb, 10,
[aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [32]:
test_scores
Out[32]:
In [33]:
save_ensemble(preds, test_set, 'fc_aux_emb_15')
In [34]:
nn_aux_emb = build_emb_model(n_features, 2, [50], emb_size, max_id, compile=True, lr=0.01)
In [35]:
nn_aux_emb.summary()
In [36]:
trn_scores, test_scores, preds = ensemble_scores(
nn_aux_emb, 10,
[aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [37]:
test_scores, np.mean(test_scores), np.std(test_scores)
Out[37]:
In [38]:
save_ensemble(preds, aux_test_set, 'nn_aux_emb_15')
In [10]:
train_set_long, test_set_long = get_datasets('07_16.pkl', ['2007-01-03', '2016-01-01'], aux=False)
In [11]:
train_set_long.features.shape
Out[11]:
In [12]:
aux_train_set_long, aux_test_set_long = get_datasets('aux_07_16.pkl', ['2007-01-03', '2016-01-01'], aux=True)
In [13]:
n_features = aux_train_set_long.features.shape[1]; n_features
Out[13]:
In [43]:
fc = build_fc_model(2, 2, compile=True, lr=0.1)
In [44]:
fc.summary()
In [45]:
trn_scores, test_scores, preds = ensemble_scores(
fc, 10,
train_set_long.features, train_set_long.targets,
test_set_long.features, test_set_long.targets,
epochs=15, batch_size=4096, verbose=0,
)
In [46]:
test_scores
Out[46]:
In [47]:
save_ensemble(preds, test_set_long, 'fc_07-15')
In [48]:
fc_aux = build_fc_model(n_features, 2, compile=True, lr=0.02)
In [49]:
fc_aux.summary()
In [50]:
trn_scores, test_scores, preds = ensemble_scores(
fc_aux, 10,
aux_train_set_long.features, aux_train_set_long.targets,
aux_test_set_long.features, aux_test_set_long.targets,
epochs=10, batch_size=1024, verbose=0,
)
In [51]:
test_scores
Out[51]:
In [52]:
save_ensemble(preds, aux_test_set_long, 'fc_aux_07-15')
In [53]:
nn_aux = build_hidden_model(n_features, 2, [64], compile=True, lr=0.02)
In [54]:
nn_aux.summary()
In [55]:
trn_scores, test_scores, preds = ensemble_scores(
nn_aux, 10,
aux_train_set_long.features, aux_train_set_long.targets,
aux_test_set_long.features, aux_test_set_long.targets,
epochs=10, batch_size=1024, verbose=0,
)
In [56]:
test_scores
Out[56]:
In [57]:
save_ensemble(preds, aux_test_set_long, 'nn_aux_07-15')
In [14]:
emb_size = 2
max_id = int(np.max([aux_train_set_long.cont_ids.max(), aux_test_set_long.cont_ids.max()]))
max_id
Out[14]:
In [59]:
fc_emb = build_emb_model(2, 2, [], emb_size, max_id, compile=True, lr=0.02)
In [60]:
fc_emb.summary()
In [61]:
trn_scores, test_scores, preds = ensemble_scores(
fc_emb, 10,
[train_set_long.features, train_set_long.cont_ids], train_set_long.targets,
[test_set_long.features, test_set_long.cont_ids], test_set_long.targets,
epochs=10, batch_size=1024, verbose=0,
)
In [62]:
test_scores
Out[62]:
In [63]:
save_ensemble(preds, test_set_long, 'fc_emb_07-15')
In [64]:
fc_aux_emb = build_emb_model(n_features, 2, [], emb_size, max_id, compile=True, lr=0.02)
In [65]:
fc_aux_emb.summary()
In [66]:
trn_scores, test_scores, preds = ensemble_scores(
fc_aux_emb, 10,
[aux_train_set_long.features, aux_train_set_long.cont_ids], aux_train_set_long.targets,
[aux_test_set_long.features, aux_test_set_long.cont_ids], aux_test_set_long.targets,
epochs=10, batch_size=1024, verbose=0,
)
In [67]:
test_scores
Out[67]:
In [68]:
save_ensemble(preds, aux_test_set_long, 'fc_aux_emb_07-15')
In [69]:
nn_aux_emb = build_emb_model(n_features, 2, [512], emb_size, max_id, compile=True, lr=0.002)
In [70]:
nn_aux_emb.summary()
In [71]:
trn_scores, test_scores, preds = ensemble_scores(
nn_aux_emb, 10,
[aux_train_set_long.features, aux_train_set_long.cont_ids], aux_train_set_long.targets,
[aux_test_set_long.features, aux_test_set_long.cont_ids], aux_test_set_long.targets,
epochs=15, batch_size=4096, verbose=0
)
In [72]:
test_scores
Out[72]:
In [73]:
save_ensemble(preds, aux_test_set, 'nn_aux_emb_07-15')
In [8]:
datasets = {}
datasets['07'] = get_datasets('aux_07_16.pkl', ['2007-01-03', '2016-01-01'], aux=True)
for y in tqdm(range(8, 16)):
yy = str(y).zfill(2)
datasets[yy] = get_datasets(f'aux_{yy}_16.pkl', [f'20{yy}-01-03', '2016-01-01'], aux=True)
In [18]:
fc_scores = []
for y in tqdm(range(7, 16)):
yy = str(y).zfill(2)
fc_aux_emb = build_emb_model(n_features, 2, [], emb_size, max_id, compile=True, lr=0.02)
train_set, test_set = datasets[yy]
fc_aux_emb.fit([train_set.features, train_set.cont_ids], train_set.targets, 1024, 30, verbose=0)
fc_scores.append(fc_aux_emb.evaluate(
[test_set.features, test_set.cont_ids], test_set.targets, 4096, 0))
In [19]:
with open('./fc_scores.pkl', 'wb') as f:
pickle.dump(fc_scores, f)
In [21]:
plt.plot(fc_scores)
Out[21]:
In [ ]: