In [1]:
import os
import sys
import datetime
import pandas as pd
import numpy as np
import pytz
from keras.models import load_model
from keras import backend as K
# Setup for Latex Export: https://matplotlib.org/users/pgf.html. Need to import before pyplot
def figsize(scale):
fig_width_pt = 469.755 # Get this from LaTeX using \the\textwidth
inches_per_pt = 1.0/72.27 # Convert pt to inch
golden_mean = (np.sqrt(5.0)-1.0)/2.0 # Aesthetic ratio (you could change this)
fig_width = fig_width_pt*inches_per_pt*scale # width in inches
fig_height = fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height]
return fig_size
import matplotlib as mpl
mpl.use('pgf')
pgf_with_rc_fonts = {
"text.usetex": True,
"font.family": "serif",
"axes.labelsize": 10, # LaTeX default is 10pt font.
"font.size": 10,
"legend.fontsize": 8, # Make the legend/label fonts a little smaller
"xtick.labelsize": 8,
"ytick.labelsize": 8,
"figure.figsize": figsize(0.9), # default fig size of 0.9 textwidth
#"font.serif": [], # use latex default serif font
#"font.sans-serif": ["DejaVu Sans"], # use a specific sans-serif font
}
mpl.rcParams.update(pgf_with_rc_fonts)
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate
# Import custom module functions
module_path = os.path.abspath(os.path.join('../'))
if module_path not in sys.path:
sys.path.append(module_path)
from lstm_load_forecasting import data, lstm
from ast import literal_eval
from dateutil.tz import tzutc
%matplotlib
In [2]:
timezone = pytz.timezone('Europe/Zurich')
starting = timezone.localize(datetime.datetime(2017,2,1,0,0,0,0))
path = os.path.join('../Data', 'fulldataset.csv')
res_path = os.path.abspath('../results/')
model_dir = os.path.abspath('../models/')
date = '20170616'
# Quite important for the rolling forecast. 1 gives not much better performance than dynamic forecast, 20 is already too much.
# 10 gave 4.0 and 4.0
In [3]:
model_id = 4
# Lets use the same as in the model run
epochs = 10 # At 10: 283 and 4.2
df4 = data.load_dataset(path=path, modules=['actual', 'entsoe', 'calendar'])
df4_scaled = df4.copy()
df4_scaled = df4_scaled.dropna()
# Get all float type columns
floats = [key for key in dict(df4_scaled.dtypes) if dict(df4_scaled.dtypes)[key] in ['float64']]
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(df4_scaled[floats])
df4_scaled[floats] = scaled_columns
df4_train = df4_scaled.loc[(df4_scaled.index < starting )].copy()
df4_test = df4_scaled.loc[df4_scaled.index >= starting].copy()
y_train = df4_train['actual'].copy()
X_train = df4_train.drop('actual', 1).copy()
y_test = df4_test['actual'].copy()
X_test = df4_test.drop('actual', 1).copy()
valid_results_4 = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('04_results_' + date + '.csv')), delimiter=';')
test_results_4 = pd.read_csv(os.path.join(res_path, 'notebook_04/', str('04_test_results' + date + '.csv')), delimiter=';')
test_results_4 = test_results_4.sort_values('Mean absolute error', ascending=True)
best_model_4 = test_results_4.loc[0]['Model name']
config = valid_results_4.loc[valid_results_4['model_name'] == best_model_4]
batch_size = int(config['batch_train'].values[0])
size = int(y_test.shape[0] / batch_size)
layers = literal_eval(config['config'].values[0])
layers = layers['layers']
# Load model
notebook = 'notebook_0' + str(model_id)
mod_name = config['model_name'].values[0]
filename = os.path.join(model_dir, notebook, (mod_name +'.h5'))
model4 = load_model(filename)
model4.reset_states()
# Loop through window
starting_loc = starting
starting_utc = starting.astimezone(pytz.utc)
ending_utc = df4_test.tail(1).index[0]
ending_loc = ending_utc.tz_convert('Europe/Zurich')
day_changes = pd.date_range(start=starting_loc, end=ending_loc, freq='24h', tz="Europe/Zurich")
predictions = pd.DataFrame(index=pd.date_range(start=starting_loc, end=ending_loc, normalize=True, freq='60min', tz='Europe/Zurich'))
predictions.index = predictions.index.tz_convert('utc')
for idx, hour in enumerate(day_changes):
if idx == 0:
pass
else:
# Select the actual values from the previous day and retrain the model with that new data
dh = df4_test.loc[(df4_test.index >= hour - pd.DateOffset(days=1)) & (df4_test.index < hour)]
y_train = dh['actual']
X_train = dh.drop('actual', 1)
history = lstm.train_model(model=model4, mode='fit', y=y_train, X=X_train,
batch_size=batch_size, timesteps=1, epochs=epochs, rearrange=False, validation_split=0,
verbose=0, early_stopping=False)
# Select the input data for the next 2 days and create multistep forecast
df = df4_test.loc[(df4_test.index >= hour) & (df4_test.index < hour + pd.DateOffset(days=2))]
df = df.drop('actual', 1)
X_predict = df
scaled_predictions = lstm.get_predictions(model=model4, X=X_predict, batch_size=batch_size, timesteps=1, verbose=1)
hour_utc = hour.tz_convert('utc')
# Combine and store results
window = pd.date_range(start=hour_utc, periods=len(scaled_predictions), freq='60min', tz='UTC')
result = pd.DataFrame(data={"model4": scaled_predictions.flatten()}, index=window)
predictions = predictions.combine_first(result)
mu = scaler.mean_[0]
sigma = scaler.scale_[0]
model4_predictions = mu + sigma*predictions
K.clear_session()
import tensorflow as tf
tf.reset_default_graph()
In [4]:
model_id = 5
epochs = 10 # At 10: 262.5 and 3.9
df5 = data.load_dataset(path=path, modules=['actual', 'calendar', 'weather'])
df5_scaled = df5.copy()
df5_scaled = df5_scaled.dropna()
# Get all float type columns
floats = [key for key in dict(df5_scaled.dtypes) if dict(df5_scaled.dtypes)[key] in ['float64']]
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(df5_scaled[floats])
df5_scaled[floats] = scaled_columns
df5_train = df5_scaled.loc[(df5_scaled.index < starting )].copy()
df5_test = df5_scaled.loc[df5_scaled.index >= starting].copy()
y_train = df5_train['actual'].copy()
X_train = df5_train.drop('actual', 1).copy()
y_test = df5_test['actual'].copy()
X_test = df5_test.drop('actual', 1).copy()
valid_results_5 = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('05_results_' + date + '.csv')), delimiter=';')
test_results_5 = pd.read_csv(os.path.join(res_path, 'notebook_05/', str('05_test_results' + date + '.csv')), delimiter=';')
test_results_5 = test_results_5.sort_values('Mean absolute error', ascending=True)
best_model_5 = test_results_5.loc[0]['Model name']
config = valid_results_5.loc[valid_results_5['model_name'] == best_model_5]
batch_size = int(config['batch_train'].values[0])
size = int(y_test.shape[0] / batch_size)
layers = literal_eval(config['config'].values[0])
layers = layers['layers']
# Load model
notebook = 'notebook_0' + str(model_id)
mod_name = config['model_name'].values[0]
filename = os.path.join(model_dir, notebook, (mod_name +'.h5'))
model5 = load_model(filename)
model5.reset_states()
# Loop through window
starting_loc = starting
starting_utc = starting.astimezone(pytz.utc)
ending_utc = df5_test.tail(1).index[0]
ending_loc = ending_utc.tz_convert('Europe/Zurich')
day_changes = pd.date_range(start=starting_loc, end=ending_loc, freq='24h', tz="Europe/Zurich")
predictions = pd.DataFrame(index=pd.date_range(start=starting_loc, end=ending_loc, normalize=True, freq='60min', tz='Europe/Zurich'))
predictions.index = predictions.index.tz_convert('utc')
for idx, hour in enumerate(day_changes):
if idx == 0:
pass
else:
# Select the actual values from the previous day and retrain the model with that new data
dh = df5_test.loc[(df5_test.index >= hour - pd.DateOffset(days=1)) & (df5_test.index < hour)]
y_train = dh['actual']
X_train = dh.drop('actual', 1)
history = lstm.train_model(model=model5, mode='fit', y=y_train, X=X_train,
batch_size=batch_size, timesteps=1, epochs=epochs, rearrange=False, validation_split=0,
verbose=0, early_stopping=False)
# Select the input data for the next 2 days and create multistep forecast
df = df5_test.loc[(df5_test.index >= hour) & (df5_test.index < hour + pd.DateOffset(days=2))]
df = df.drop('actual', 1)
X_predict = df
scaled_predictions = lstm.get_predictions(model=model5, X=X_predict, batch_size=batch_size, timesteps=1, verbose=1)
hour_utc = hour.tz_convert('utc')
# Combine and store results
window = pd.date_range(start=hour_utc, periods=len(scaled_predictions), freq='60min', tz='UTC')
result = pd.DataFrame(data={"model5": scaled_predictions.flatten()}, index=window)
predictions = predictions.combine_first(result)
mu = scaler.mean_[0]
sigma = scaler.scale_[0]
model5_predictions = mu + sigma*predictions
K.clear_session()
import tensorflow as tf
tf.reset_default_graph()
In [5]:
model_id = 6
epochs = 10 # At 10: 274 and 4.1
df6 = data.load_dataset(path=path, modules=['all'])
df6_scaled = df6.copy()
df6_scaled = df6_scaled.dropna()
# Get all float type columns
floats = [key for key in dict(df6_scaled.dtypes) if dict(df6_scaled.dtypes)[key] in ['float64']]
scaler = StandardScaler()
scaled_columns = scaler.fit_transform(df6_scaled[floats])
df6_scaled[floats] = scaled_columns
df6_train = df6_scaled.loc[(df6_scaled.index < starting )].copy()
df6_test = df6_scaled.loc[df6_scaled.index >= starting].copy()
y_train = df6_train['actual'].copy()
X_train = df6_train.drop('actual', 1).copy()
y_test = df6_test['actual'].copy()
X_test = df6_test.drop('actual', 1).copy()
valid_results_6 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('06_results_' + date + '.csv')), delimiter=';')
test_results_6 = pd.read_csv(os.path.join(res_path, 'notebook_06/', str('06_test_results' + date + '.csv')), delimiter=';')
test_results_6 = test_results_6.sort_values('Mean absolute error', ascending=True)
best_model_6 = test_results_6.loc[0]['Model name']
config = valid_results_6.loc[valid_results_6['model_name'] == best_model_6]
batch_size = int(config['batch_train'].values[0])
size = int(y_test.shape[0] / batch_size)
layers = literal_eval(config['config'].values[0])
layers = layers['layers']
# Load model
notebook = 'notebook_0' + str(model_id)
mod_name = config['model_name'].values[0]
filename = os.path.join(model_dir, notebook, (mod_name +'.h5'))
model6 = load_model(filename)
model6.reset_states()
# Loop through window
starting_loc = starting
starting_utc = starting.astimezone(pytz.utc)
ending_utc = df6_test.tail(1).index[0]
ending_loc = ending_utc.tz_convert('Europe/Zurich')
day_changes = pd.date_range(start=starting_loc, end=ending_loc, freq='24h', tz="Europe/Zurich")
predictions = pd.DataFrame(index=pd.date_range(start=starting_loc, end=ending_loc, normalize=True, freq='60min', tz='Europe/Zurich'))
predictions.index = predictions.index.tz_convert('utc')
for idx, hour in enumerate(day_changes):
if idx == 0:
pass
else:
# Select the actual values from the previous day and retrain the model with that new data
dh = df6_test.loc[(df6_test.index >= hour - pd.DateOffset(days=1)) & (df6_test.index < hour)]
y_train = dh['actual']
X_train = dh.drop('actual', 1)
history = lstm.train_model(model=model6, mode='fit', y=y_train, X=X_train,
batch_size=batch_size, timesteps=1, epochs=epochs, rearrange=False, validation_split=0,
verbose=0, early_stopping=False)
# Select the input data for the next 2 days and create multistep forecast
df = df6_test.loc[(df6_test.index >= hour) & (df6_test.index < hour + pd.DateOffset(days=2))]
df = df.drop('actual', 1)
X_predict = df
scaled_predictions = lstm.get_predictions(model=model6, X=X_predict, batch_size=batch_size, timesteps=1, verbose=1)
hour_utc = hour.tz_convert('utc')
# Combine and store results
window = pd.date_range(start=hour_utc, periods=len(scaled_predictions), freq='60min', tz='UTC')
result = pd.DataFrame(data={"model6": scaled_predictions.flatten()}, index=window)
predictions = predictions.combine_first(result)
mu = scaler.mean_[0]
sigma = scaler.scale_[0]
model6_predictions = mu + sigma*predictions
K.clear_session()
import tensorflow as tf
tf.reset_default_graph()
In [11]:
timezone = pytz.timezone('Europe/Zurich')
starting = timezone.localize(datetime.datetime(2017,2,1,0,0,0,0))
arma_fc = pd.read_csv(os.path.join('../data', 'arma_rolling_fc.csv'))
arma_forecasts = pd.DataFrame(data={"arma_forecast": arma_fc['x'].values}, index=pd.date_range(starting, periods=arma_fc.shape[0], freq='60min'))
arma_fc2 = pd.read_csv(os.path.join('../data', 'arma_rolling_fc2.csv'))
arma_forecasts2 = pd.DataFrame(data={"arma_forecast2": arma_fc2['x'].values}, index=pd.date_range(starting, periods=arma_fc2.shape[0], freq='60min'))
In [12]:
def mean_absolute_percentage_error(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
dfc = data.load_dataset(path=path, modules=['actual', 'entsoe'])
forecasts = model4_predictions
forecasts = forecasts.join(model5_predictions)
forecasts = forecasts.join(model6_predictions)
forecasts = forecasts.join(dfc)
forecasts = forecasts.join(arma_forecasts)
forecasts = forecasts.join(arma_forecasts2)
forecasts = forecasts.dropna()
results = {}
results[''] = ['MSE', 'MAE', 'MAPE']
results['entsoe'] = [mean_squared_error(forecasts['actual'], forecasts['entsoe']),
mean_absolute_error(forecasts['actual'], forecasts['entsoe']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['entsoe'])
]
results['arima'] = [mean_squared_error(forecasts['actual'], forecasts['arma_forecast']),
mean_absolute_error(forecasts['actual'], forecasts['arma_forecast']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['arma_forecast'])
]
results['arima2'] = [mean_squared_error(forecasts['actual'], forecasts['arma_forecast2']),
mean_absolute_error(forecasts['actual'], forecasts['arma_forecast2']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['arma_forecast2'])
]
results['m4-entsoe-cal'] = [mean_squared_error(forecasts['actual'], forecasts['model4']),
mean_absolute_error(forecasts['actual'], forecasts['model4']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['model4'])
]
results['m5-cal-weather'] = [mean_squared_error(forecasts['actual'], forecasts['model5']),
mean_absolute_error(forecasts['actual'], forecasts['model5']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['model5'])
]
results['m6-all'] = [mean_squared_error(forecasts['actual'], forecasts['model6']),
mean_absolute_error(forecasts['actual'], forecasts['model6']),
mean_absolute_percentage_error(forecasts['actual'], forecasts['model6'])
]
print(tabulate(results, headers='keys', numalign="right", tablefmt='latex', floatfmt=".1f"))
In [10]:
plt.figure()
#plt.plot(forecasts.index, forecasts['entsoe'], label='ENTSOE Forecast')
plt.plot(forecasts.index, forecasts['arma_forecast'], label='ARIMA (0,1,5)')
plt.plot(forecasts.index, forecasts['actual'], label='Actual Load')
#plt.plot(forecasts.index, forecasts['model4'], label='Model 4 (ENTSO-E and Calendar)')
plt.plot(forecasts.index, forecasts['model5'], label='Model 5 (Calendar and Weather)')
#plt.plot(forecasts.index, forecasts['model6'], label='Model 6 (All)')
plt.title('Forecast Comparison: Rolling Forecast')
plt.ylabel('Electricity load (in MW)')
plt.xlabel('Date')
plt.legend(loc='upper right')
plt.show
Out[10]:
In [ ]: