In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
%matplotlib inline
from nn_src.imports import *
from copy import deepcopy
In [3]:
#DATA_DIR = '/Users/stephanrasp/data/'
DATA_DIR = '/scratch/srasp/'
In [4]:
# Open the dataset with training = 2015 and test = 2016
with open(DATA_DIR + 'pickled/aux_15_16.pkl', 'rb') as f:
aux_train_set, aux_test_set = pickle.load(f)
In [9]:
def reset_weights(model):
session = K.get_session()
for layer in model.layers:
if hasattr(layer, 'kernel_initializer'):
layer.kernel.initializer.run(session=session)
In [57]:
def avg_score(m, n, x_trn, y_trn, x_test, y_test, **kwargs):
trn_scores, test_scores, preds = [], [], []
for i in tqdm(range(n)):
reset_weights(m)
m.fit(x_trn, y_trn, **kwargs)
trn_scores.append(m.evaluate(x_trn, y_trn, 4096, verbose=0))
test_scores.append(m.evaluate(x_test, y_test, 4096, verbose=0))
preds.append(m.predict(x_test, 4096, verbose=0))
return trn_scores, test_scores, preds
In [ ]:
In [7]:
n_features = aux_train_set.features.shape[1]; n_features
Out[7]:
In [6]:
emb_size = 2
max_id = int(np.max([aux_train_set.cont_ids.max(), aux_test_set.cont_ids.max()]))
max_id
Out[6]:
In [58]:
def build_emb_model(n_features, n_outputs, hidden_nodes, emb_size, max_id,
compile=False, optimizer='adam', lr=0.01,
loss=crps_cost_function, activation='relu', regularizer=None):
if type(hidden_nodes) is not list:
hidden_nodes = [hidden_nodes]
features_in = Input(shape=(n_features,))
id_in = Input(shape=(1,))
emb = Embedding(max_id + 1, emb_size)(id_in)
emb = Flatten()(emb)
x = Concatenate()([features_in, emb])
for h in hidden_nodes:
x = Dense(h, activation=activation, kernel_regularizer=regularizer)(x)
x = Dense(n_outputs, activation='linear')(x)
model = Model(inputs=[features_in, id_in], outputs=x)
if compile:
opt = keras.optimizers.__dict__[optimizer](lr=lr)
model.compile(optimizer=opt, loss=loss)
return model
In [75]:
emb_nnet = build_emb_model(n_features, 2, [50], emb_size, max_id, compile=True, lr=0.01)
emb_nnet.summary()
In [76]:
trn_scores, test_scores, preds = avg_score(
emb_nnet, 5,
[aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [78]:
test_scores, np.mean(test_scores)
Out[78]:
In [79]:
preds = np.array(preds); preds.shape
Out[79]:
In [80]:
# Make sure std is positive
preds[:, :, 1] = np.abs(preds[:, :, 1])
In [81]:
mean_preds = np.mean(preds, 0); mean_preds.shape, aux_test_set.targets.shape
Out[81]:
In [82]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets).mean()
Out[82]:
In [96]:
emb_nnet.evaluate(
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets, 4096, 0)
Out[96]:
In [99]:
emb_nnet.save('./tmp.h5')
In [106]:
unique_dates = sorted(list(set(aux_test_set.date_strs.flat))); unique_dates[:5]
Out[106]:
In [107]:
def get_date_idxs(date_str): return aux_test_set.date_strs == date_str
In [242]:
def online_loop(m):
preds = []
for i in tqdm(range(len(unique_dates)-1)):
idxs_trn = get_date_idxs(unique_dates[i])
idxs_test = get_date_idxs(unique_dates[i+1])
m.train_on_batch(
[aux_test_set.features[idxs_trn], aux_test_set.cont_ids[idxs_trn]], aux_test_set.targets[idxs_trn],
)
preds.append(m.predict_on_batch([aux_test_set.features[idxs_test], aux_test_set.cont_ids[idxs_test]]))
return np.concatenate(preds)
In [283]:
test_m = keras.models.load_model('./tmp.h5')
In [284]:
#test_m.trainable=False
In [285]:
test_m.compile('adam', crps_cost_function)
In [286]:
test_m.optimizer.lr=0.00001
In [287]:
online_preds = online_loop(test_m)
In [288]:
online_preds.shape
Out[288]:
In [289]:
test_idxs = ~get_date_idxs(unique_dates[0])
In [290]:
online_preds.shape, aux_test_set.targets[test_idxs].shape
Out[290]:
In [291]:
crps_normal(online_preds[:, 0], online_preds[:, 1], aux_test_set.targets[test_idxs]).mean()
Out[291]:
In [236]:
test_m.evaluate(
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets, 4096, 0)
Out[236]:
In [305]:
def avg_score_online(m, n, x_trn, y_trn, x_test, y_test, **kwargs):
trn_scores, test_scores, preds, online_preds = [], [], [], []
for i in tqdm(range(n)):
m.compile('adam', crps_cost_function)
m.optimizer.lr=0.01
reset_weights(m)
m.fit(x_trn, y_trn, **kwargs)
trn_scores.append(m.evaluate(x_trn, y_trn, 4096, verbose=0))
test_scores.append(m.evaluate(x_test, y_test, 4096, verbose=0))
preds.append(m.predict(x_test, 4096, verbose=0))
m.optimizer.lr=0.00001
m.compile('adam', crps_cost_function)
online_preds.append(online_loop(m))
return trn_scores, test_scores, preds, online_preds
In [317]:
emb_nnet = build_emb_model(n_features, 2, [50], emb_size, max_id, compile=True, lr=0.01)
In [318]:
trn_scores, test_scores, preds, online_preds = avg_score_online(
emb_nnet, 5,
[aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
[aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
epochs=30, batch_size=1024, verbose=0,
)
In [319]:
test_scores
Out[319]:
In [320]:
online_preds[0].shape
Out[320]:
In [321]:
online_preds = np.array(online_preds); online_preds.shape
Out[321]:
In [322]:
# Make sure std is positive
online_preds[:, :, 1] = np.abs(online_preds[:, :, 1])
In [323]:
online_preds[0,-5:]
Out[323]:
In [324]:
preds[0][-5:]
Out[324]:
In [325]:
mean_preds = np.mean(online_preds, 0); mean_preds.shape, aux_test_set.targets[test_idxs].shape
Out[325]:
In [326]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets[test_idxs]).mean()
Out[326]:
In [327]:
crps_normal(online_preds[0,:, 0], online_preds[0,:, 1], aux_test_set.targets[test_idxs]).mean()
Out[327]:
In [ ]:
In [329]:
preds = np.array(preds); online_preds.shape
Out[329]:
In [330]:
# Make sure std is positive
preds[:, :, 1] = np.abs(preds[:, :, 1])
In [331]:
mean_preds = np.mean(preds, 0); mean_preds.shape, aux_test_set.targets.shape
Out[331]:
In [332]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets).mean()
Out[332]:
In [ ]: