Ensembling and online learning


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
from nn_src.imports import *
from copy import deepcopy


/export/home/srasp/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.

In [3]:
#DATA_DIR = '/Users/stephanrasp/data/'
DATA_DIR = '/scratch/srasp/'

In [4]:
# Open the dataset with training = 2015 and test = 2016
with open(DATA_DIR + 'pickled/aux_15_16.pkl', 'rb') as f:
    aux_train_set, aux_test_set = pickle.load(f)

In [9]:
def reset_weights(model):
    session = K.get_session()
    for layer in model.layers: 
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel.initializer.run(session=session)

In [57]:
def avg_score(m, n, x_trn, y_trn, x_test, y_test, **kwargs):
    trn_scores, test_scores, preds = [], [], []
    for i in tqdm(range(n)):
        reset_weights(m)
        m.fit(x_trn, y_trn, **kwargs)
        trn_scores.append(m.evaluate(x_trn, y_trn, 4096, verbose=0))
        test_scores.append(m.evaluate(x_test, y_test, 4096, verbose=0))
        preds.append(m.predict(x_test, 4096, verbose=0))
    return trn_scores, test_scores, preds

In [ ]:

Ensembles


In [7]:
n_features = aux_train_set.features.shape[1]; n_features


Out[7]:
40

In [6]:
emb_size = 2
max_id = int(np.max([aux_train_set.cont_ids.max(), aux_test_set.cont_ids.max()]))
max_id


Out[6]:
536

In [58]:
def build_emb_model(n_features, n_outputs, hidden_nodes, emb_size, max_id,
                    compile=False, optimizer='adam', lr=0.01,
                    loss=crps_cost_function, activation='relu', regularizer=None):
    if type(hidden_nodes) is not list:
        hidden_nodes = [hidden_nodes]

    features_in = Input(shape=(n_features,))
    id_in = Input(shape=(1,))
    emb = Embedding(max_id + 1, emb_size)(id_in)
    emb = Flatten()(emb)
    x = Concatenate()([features_in, emb])
    for h in hidden_nodes:
        x = Dense(h, activation=activation, kernel_regularizer=regularizer)(x)
    x = Dense(n_outputs, activation='linear')(x)
    model = Model(inputs=[features_in, id_in], outputs=x)

    if compile:
        opt = keras.optimizers.__dict__[optimizer](lr=lr)
        model.compile(optimizer=opt, loss=loss)
    return model

In [75]:
emb_nnet = build_emb_model(n_features, 2, [50], emb_size, max_id, compile=True, lr=0.01)
emb_nnet.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_26 (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_13 (Embedding)        (None, 1, 2)         1074        input_26[0][0]                   
__________________________________________________________________________________________________
input_25 (InputLayer)           (None, 40)           0                                            
__________________________________________________________________________________________________
flatten_13 (Flatten)            (None, 2)            0           embedding_13[0][0]               
__________________________________________________________________________________________________
concatenate_13 (Concatenate)    (None, 42)           0           input_25[0][0]                   
                                                                 flatten_13[0][0]                 
__________________________________________________________________________________________________
dense_25 (Dense)                (None, 50)           2150        concatenate_13[0][0]             
__________________________________________________________________________________________________
dense_26 (Dense)                (None, 2)            102         dense_25[0][0]                   
==================================================================================================
Total params: 3,326
Trainable params: 3,326
Non-trainable params: 0
__________________________________________________________________________________________________

In [76]:
trn_scores, test_scores, preds = avg_score(
    emb_nnet, 5,
    [aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
    [aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
    epochs=30, batch_size=1024, verbose=0,
)




In [78]:
test_scores, np.mean(test_scores)


Out[78]:
([0.8383716718552253,
  0.8389709734562365,
  0.8934082225167738,
  0.8474290403488233,
  0.8307095793864994],
 0.8497778975127117)

In [79]:
preds = np.array(preds); preds.shape


Out[79]:
(5, 182218, 2)

In [80]:
# Make sure std is positive
preds[:, :, 1] = np.abs(preds[:, :, 1])

In [81]:
mean_preds = np.mean(preds, 0); mean_preds.shape, aux_test_set.targets.shape


Out[81]:
((182218, 2), (182218,))

In [82]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets).mean()


Out[82]:
0.8203130683371079

Online learning


In [96]:
emb_nnet.evaluate(
    [aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets, 4096, 0)


Out[96]:
0.8307095793864994

In [99]:
emb_nnet.save('./tmp.h5')

In [106]:
unique_dates = sorted(list(set(aux_test_set.date_strs.flat))); unique_dates[:5]


Out[106]:
['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04', '2016-01-05']

In [107]:
def get_date_idxs(date_str): return aux_test_set.date_strs == date_str

In [242]:
def online_loop(m):
    preds = []
    for i in tqdm(range(len(unique_dates)-1)):
        idxs_trn = get_date_idxs(unique_dates[i])
        idxs_test = get_date_idxs(unique_dates[i+1])
        m.train_on_batch(
            [aux_test_set.features[idxs_trn], aux_test_set.cont_ids[idxs_trn]], aux_test_set.targets[idxs_trn],
        )
        preds.append(m.predict_on_batch([aux_test_set.features[idxs_test], aux_test_set.cont_ids[idxs_test]]))
    return np.concatenate(preds)

In [283]:
test_m = keras.models.load_model('./tmp.h5')

In [284]:
#test_m.trainable=False

In [285]:
test_m.compile('adam', crps_cost_function)

In [286]:
test_m.optimizer.lr=0.00001

In [287]:
online_preds = online_loop(test_m)




In [288]:
online_preds.shape


Out[288]:
(181719, 2)

In [289]:
test_idxs = ~get_date_idxs(unique_dates[0])

In [290]:
online_preds.shape, aux_test_set.targets[test_idxs].shape


Out[290]:
((181719, 2), (181719,))

In [291]:
crps_normal(online_preds[:, 0], online_preds[:, 1], aux_test_set.targets[test_idxs]).mean()


Out[291]:
-0.830081559553131

In [236]:
test_m.evaluate(
    [aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets, 4096, 0)


Out[236]:
0.8286054799061859

Combine the two


In [305]:
def avg_score_online(m, n, x_trn, y_trn, x_test, y_test, **kwargs):
    trn_scores, test_scores, preds, online_preds = [], [], [], []
    for i in tqdm(range(n)):
        m.compile('adam', crps_cost_function)
        m.optimizer.lr=0.01
        reset_weights(m)
        m.fit(x_trn, y_trn, **kwargs)
        trn_scores.append(m.evaluate(x_trn, y_trn, 4096, verbose=0))
        test_scores.append(m.evaluate(x_test, y_test, 4096, verbose=0))
        preds.append(m.predict(x_test, 4096, verbose=0))
        m.optimizer.lr=0.00001
        m.compile('adam', crps_cost_function)
        online_preds.append(online_loop(m))
    return trn_scores, test_scores, preds, online_preds

In [317]:
emb_nnet = build_emb_model(n_features, 2, [50], emb_size, max_id, compile=True, lr=0.01)

In [318]:
trn_scores, test_scores, preds, online_preds = avg_score_online(
    emb_nnet, 5,
    [aux_train_set.features, aux_train_set.cont_ids], aux_train_set.targets,
    [aux_test_set.features, aux_test_set.cont_ids], aux_test_set.targets,
    epochs=30, batch_size=1024, verbose=0,
)




In [319]:
test_scores


Out[319]:
[0.8424227915995457,
 0.8488390051513499,
 0.834612192998116,
 0.8654272062668905,
 0.8546882446186251]

In [320]:
online_preds[0].shape


Out[320]:
(181719, 2)

In [321]:
online_preds = np.array(online_preds); online_preds.shape


Out[321]:
(5, 181719, 2)

In [322]:
# Make sure std is positive
online_preds[:, :, 1] = np.abs(online_preds[:, :, 1])

In [323]:
online_preds[0,-5:]


Out[323]:
array([[-1.2359246,  1.8562961],
       [-1.6576102,  1.753814 ],
       [-2.6927898,  1.8054234],
       [-4.257032 ,  2.176898 ],
       [-5.8717957,  2.3662326]], dtype=float32)

In [324]:
preds[0][-5:]


Out[324]:
array([[-1.5881306,  1.4701391],
       [-2.257106 ,  1.3159055],
       [-2.9522974,  1.3489943],
       [-4.5483594,  1.7433686],
       [-6.2532177,  1.9737092]], dtype=float32)

In [325]:
mean_preds = np.mean(online_preds, 0); mean_preds.shape, aux_test_set.targets[test_idxs].shape


Out[325]:
((181719, 2), (181719,))

In [326]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets[test_idxs]).mean()


Out[326]:
0.8492245477375177

In [327]:
crps_normal(online_preds[0,:, 0], online_preds[0,:, 1], aux_test_set.targets[test_idxs]).mean()


Out[327]:
0.8573734581514151

In [ ]:


In [329]:
preds = np.array(preds); online_preds.shape


Out[329]:
(5, 181719, 2)

In [330]:
# Make sure std is positive
preds[:, :, 1] = np.abs(preds[:, :, 1])

In [331]:
mean_preds = np.mean(preds, 0); mean_preds.shape, aux_test_set.targets.shape


Out[331]:
((182218, 2), (182218,))

In [332]:
crps_normal(mean_preds[:, 0], mean_preds[:, 1], aux_test_set.targets).mean()


Out[332]:
0.8222618037912671

In [ ]: