Top results from all different model categories

Here all top models from each model category is compared to eachother


In [13]:
# Date in which the model run took place
date = '20170616'

In [50]:
import os
import sys
import datetime as dt
import time as t
import pandas as pd
import numpy as np

import pytz

# Setup for Latex Export: https://matplotlib.org/users/pgf.html. Need to import before pyplot
def figsize(scale):
    fig_width_pt = 469.755                          # Get this from LaTeX using \the\textwidth
    inches_per_pt = 1.0/72.27                       # Convert pt to inch
    golden_mean = (np.sqrt(5.0)-1.0)/2.0            # Aesthetic ratio (you could change this)
    fig_width = fig_width_pt*inches_per_pt*scale    # width in inches
    fig_height = fig_width*golden_mean              # height in inches
    fig_size = [fig_width,fig_height]
    return fig_size

import matplotlib as mpl
mpl.use('pgf')
pgf_with_rc_fonts = {
    "text.usetex": True,
    "font.family": "serif",
    "axes.labelsize": 10,               # LaTeX default is 10pt font.
    "font.size": 10,
    "legend.fontsize": 8,               # Make the legend/label fonts a little smaller
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "figure.figsize": figsize(0.9),     # default fig size of 0.9 textwidth
    #"font.serif": [],                   # use latex default serif font
    #"font.sans-serif": ["DejaVu Sans"], # use a specific sans-serif font
}
mpl.rcParams.update(pgf_with_rc_fonts)

import matplotlib.pyplot as plt

from tabulate import tabulate

from keras.models import load_model
from keras import backend as K

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Import custom module functions
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
    sys.path.append(module_path)

from lstm_load_forecasting import data, lstm

from IPython.display import HTML
from IPython.display import display
%matplotlib 
#notebook
#mpl.rcParams['figure.figsize'] = (9,5)


/Users/david/anaconda/lib/python3.6/site-packages/matplotlib/__init__.py:1401: UserWarning:  This call to matplotlib.use() has no effect
because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

  warnings.warn(_use_error_msg)
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-50-b44794211a3f> in <module>()
     42 
     43 from sklearn.preprocessing import StandardScaler
---> 44 from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
     45 
     46 # Import custom module functions

ImportError: cannot import name 'mean_absolute_percentage_error'

Preparation


In [15]:
# Path were the results are stored
path = os.path.join(os.path.abspath(''), '../data/fulldataset.csv')

res_path = os.path.abspath('../results/')
model_dir = os.path.abspath('../models/')

# Load validation results
valid_results = {}
valid_results['m1'] = pd.read_csv(os.path.join(res_path, 'notebook_01/', str('01_results_' + date + '.csv')), delimiter=';')
valid_results['m2'] = pd.read_csv(os.path.join(res_path, 'notebook_02/', str('02_results_' + date + '.csv')), delimiter=';')
valid_results['m3'] = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('03_results_' + date + '.csv')), delimiter=';')
valid_results['m4'] = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('04_results_' + date + '.csv')), delimiter=';')
valid_results['m5'] = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('05_results_' + date + '.csv')), delimiter=';')
valid_results['m6'] = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('06_results_' + date + '.csv')), delimiter=';')

# Load test results
test_results_01 = pd.read_csv(os.path.join(res_path, 'notebook_01/', str('01_test_results' + date + '.csv')), delimiter=';')
test_results_02 = pd.read_csv(os.path.join(res_path, 'notebook_02/', str('02_test_results' + date + '.csv')), delimiter=';')
test_results_03 = pd.read_csv(os.path.join(res_path, 'notebook_03/', str('03_test_results' + date + '.csv')), delimiter=';')
test_results_04 = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('04_test_results' + date + '.csv')), delimiter=';')
test_results_05 = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('05_test_results' + date + '.csv')), delimiter=';')
test_results_06 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('06_test_results' + date + '.csv')), delimiter=';')

columns=['Model name', 'Mean absolute error', 'Mean squared error']
best_models = {}

# Splitdate for train and test data. As the TBATS and ARIMA benchmark needs 2 full cycle of all seasonality, needs to be after jan 01. 
loc_tz = pytz.timezone('Europe/Zurich')
split_date = loc_tz.localize(dt.datetime(2017,2,1,0,0,0,0))

Model 1: ENTSOE Only


In [16]:
test_results_01 = test_results_01.sort_values('Mean absolute error', ascending=True)
best_models['m1'] = [test_results_01.loc[0]['Model name'], ['entsoe']]
test_results_01


Out[16]:
Model name Mean absolute error Mean squared error
0 01_386_l-150_d-0.1 0.292969 0.137098
1 01_339_l-125_d-0.2 0.295669 0.141007
2 01_385_l-150 0.296273 0.139769
3 01_338_l-125_d-0.1 0.299512 0.140843
4 01_291_l-100_d-0.2 0.300057 0.141976

Model 2: Calendar only


In [17]:
test_results_02 = test_results_02.sort_values('Mean absolute error', ascending=True)
best_models['m2'] = [test_results_02.loc[0]['Model name'], ['calendar']]
test_results_02


Out[17]:
Model name Mean absolute error Mean squared error
0 1_02_2_l-5_d-0.1 0.304772 0.150897
1 1_02_50_l-10_d-0.1 0.319560 0.164797
2 1_02_135_l-20_l-50_d-0.2 0.341185 0.180759
3 3_02_51_l-100_d-0.2 0.351201 0.189321
4 3_02_49_l-100 0.366878 0.209458

Model 3: Weather only


In [18]:
test_results_03 = test_results_03.sort_values('Mean absolute error', ascending=True)
best_models['m3'] = [test_results_03.loc[0]['Model name'], ['weather']]
test_results_03


Out[18]:
Model name Mean absolute error Mean squared error
0 3_03_54_l-100_l-10_d-0.2 0.432015 0.282780
1 2_03_5_l-30_l-10_d-0.1 0.442317 0.302648
2 3_03_5_l-75_l-10_d-0.1 0.457086 0.314461
3 3_03_79_l-100_l-20_l-15 0.467399 0.333536
4 2_03_4_l-30_l-10 0.473051 0.346634

Model 4: ENTSOE + Calendar


In [19]:
test_results_04 = test_results_04.sort_values('Mean absolute error', ascending=True)
best_models['m4'] = [test_results_04.loc[0]['Model name'], ['entsoe', 'calendar']]
test_results_04


Out[19]:
Model name Mean absolute error Mean squared error
0 04_49_l-10 0.289041 0.131035
1 04_62_l-10_l-10_d-0.1 0.291371 0.135468
2 04_155_l-30_l-20_d-0.1 0.292002 0.133764
3 04_99_l-20_d-0.2 0.295192 0.139207
4 04_145_l-30 0.302183 0.145987

Model 5: Calendar + Weather


In [20]:
test_results_05 = test_results_05.sort_values('Mean absolute error', ascending=True)
best_models['m5'] = [test_results_05.loc[0]['Model name'], ['calendar', 'weather']]
test_results_05


Out[20]:
Model name Mean absolute error Mean squared error
0 05_374_l-125_l-50_d-0.1 0.268845 0.120883
1 05_255_l-75_l-10_d-0.2 0.280159 0.124944
2 05_151_l-30_l-15 0.295939 0.138655
3 05_217_l-50_l-20 0.296139 0.139711
4 05_277_l-75_l-50 0.313155 0.153873

Model 6: All available data


In [21]:
test_results_06 = test_results_06.sort_values('Mean absolute error', ascending=True)
best_models['m6'] = [test_results_06.loc[0]['Model name'], ['all']]
test_results_06


Out[21]:
Model name Mean absolute error Mean squared error
0 06_147_l-30_d-0.2 0.290608 0.134364
1 06_243_l-75_d-0.2 0.306467 0.144846
2 06_86_l-10_l-50_d-0.1 0.307036 0.147213
3 06_373_l-125_l-50 0.309126 0.145982
4 06_199_l-50_l-15 0.335317 0.166415

In [103]:
config['model_name'].values[0]


Out[103]:
'06_147_l-30_d-0.2'

All models compared:


In [93]:
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

results = pd.DataFrame(columns=['Model name', 'Mean absolute error', 'Mean squared error', 'MAPE', 'Diff. MAE', 'Diff. MSE'])

for idx, m in enumerate(best_models.keys()):
    
    # Load original config:
    config = valid_results[m].loc[valid_results[m]['model_name'] == best_models[m][0]]
    features = best_models[m][1]
    features.append('actual')
    batch_size = int(config['batch_train'].values[0])
    
    notebook = 'notebook_0' + str(m[1:2])
    mod_name = config['model_name'].values[0]
    filename = os.path.join(model_dir, notebook, (mod_name +'.h5'))

    # Load data and prepare for standardization
    df = data.load_dataset(path=path, modules=features)
    df_scaled = df.copy()
    df_scaled = df_scaled.dropna()

    # Get all float type columns and standardize them
    floats = [key for key in dict(df_scaled.dtypes) if dict(df_scaled.dtypes)[key] in ['float64']]
    scaler = StandardScaler()
    scaled_columns = scaler.fit_transform(df_scaled[floats])
    df_scaled[floats] = scaled_columns

    # Split in train and test dataset
    df_train = df_scaled.loc[(df_scaled.index < split_date )].copy()
    df_test = df_scaled.loc[df_scaled.index >= split_date].copy()

    # Split in features and label data
    y_train = df_train['actual'].copy()
    X_train = df_train.drop('actual', 1).copy()
    y_test = df_test['actual'].copy()
    X_test = df_test.drop('actual', 1).copy()
    
    # Load model and generate predictions
    model = load_model(filename)
    model.reset_states()
    predictions = lstm.get_predictions(model=model, X=X_test, batch_size=batch_size, timesteps=1, verbose=0)
        
    # Otherwise, we get a memory leak!
    K.clear_session()
    import tensorflow as tf
    tf.reset_default_graph()
    
    # Get the scaling params from the standarization and rescale
    mu = scaler.mean_[0]
    sigma = scaler.scale_[0]
    predictions_raw = np.reshape(np.round(predictions * sigma + mu), (predictions.shape[0], ))
    actual_raw = np.round(y_test * sigma + mu)
    
    # Load the entsoe benchmark
    size = int(y_test.shape[0] / batch_size)
    entsoe_benchmark = data.load_dataset(path=path, modules=['entsoe'])
    entsoe_benchmark = entsoe_benchmark.loc[entsoe_benchmark.index >= split_date].copy()
    entsoe_benchmark = np.reshape(entsoe_benchmark.values, (entsoe_benchmark.shape[0],))
    
    # Calculate benchmark and model MAE and MSE
    mse_entsoe = mean_squared_error(actual_raw[0:size*batch_size], entsoe_benchmark[0:size*batch_size])
    mae_entsoe = mean_absolute_error(actual_raw[0:size*batch_size], entsoe_benchmark[0:size*batch_size])
    mape_entsoe = mean_absolute_percentage_error(actual_raw[0:size*batch_size], entsoe_benchmark[0:size*batch_size])
    mse = mean_squared_error(actual_raw[0:size*batch_size], predictions_raw)
    mae = mean_absolute_error(actual_raw[0:size*batch_size], predictions_raw)
    mape = mean_absolute_percentage_error(actual_raw[0:size*batch_size], predictions_raw)
    
    # Store results
    result = [{'Model name': mod_name, 
               'Mean squared error': mse, 'Mean absolute error': mae,
               'MAPE': mape,
               'Diff. MAE': mae - mae_entsoe, 'Diff. MSE': mse - mse_entsoe,
              }]
    results = results.append(result, ignore_index=True)
    
    graph = False
    if graph:
        
        # Set time vector for graph
        time_test = y_test.index
        time_vector = time_test.values
        time_vector = time_vector[0:size*batch_size]
        time_vector = np.reshape(time_vector, (size*batch_size,1))

        #%matplotlib qt
        #plt.clf()
        #plt.ion()
    
        plt.figure()
        plt.plot(time_vector, entsoe_benchmark[:size*batch_size], label='ENTSOE Forecast')
        plt.plot(time_vector, actual_raw[:size*batch_size], label='Actual Load')
        plt.plot(time_vector, predictions_raw, label='Model predictions')
        plt.title('LSTM Model: ${}$'.format(mod_name))
        plt.ylabel('Electricity load (in MW)')
        plt.xlabel('Date')
        plt.legend(loc='upper left')
        plt.show

        #filename = plot_dir + model_name + 'top_model_predictions'
        #plt.savefig(filename + '.pgf')
        #plt.savefig(filename + '.pdf')

In [96]:
results = results.sort_values('Diff. MAE', ascending=True)
print('Test dataset performance of the best models):')
print('ENTSOE Forecast (Benchmark) metrics: \tMAE = {:.3f}  \tMSE = {:.3f}  \tMAPE = {:.3f}'.format(np.asscalar(mae_entsoe), np.asscalar(mse_entsoe), mape_entsoe))
print(tabulate(results, headers='keys', numalign="right", floatfmt=".2f"))


Test dataset performance of the best models):
ENTSOE Forecast (Benchmark) metrics: 	MAE = 518.276  	MSE = 427415.152  	MAPE = 7.686
      Model name                  Mean absolute error    Mean squared error    MAPE    Diff. MAE    Diff. MSE
----  ------------------------  ---------------------  --------------------  ------  -----------  -----------
4.00  05_374_l-125_l-50_d-0.1                  311.59             162371.26    4.67      -206.69   -265043.90
3.00  04_49_l-10                               335.00             176006.62    5.05      -183.28   -251408.53
5.00  06_147_l-30_d-0.2                        336.80             180471.58    5.12      -181.48   -246943.57
0.00  01_386_l-150_d-0.1                       339.54             184154.85    5.11      -178.73   -243260.31
1.00  1_02_2_l-5_d-0.1                         353.21             202680.59    5.31      -165.06   -224734.56
2.00  3_03_54_l-100_l-10_d-0.2                 500.68             379821.29    7.34       -17.59    -47593.86

In [ ]: