Peak prediction model

Create a model that, given hourly load and temperature data, return a dispatch decision for the following day. This decision will be informed by 1-day prediction, 2-day prediction, and historical peak minimums.


In [5]:
import pandas as pd
from scipy.stats import zscore
import pickle
import time
import numpy as np
from sklearn.linear_model import LinearRegression
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import loadForecast as lf

%matplotlib inline

Create daily data, find correct answers


In [ ]:
# date, max_load
d_df = pd.DataFrame()
df['date'] = df.dates.dt.date
d_df['max_load'] = df.groupby('date')['load'].max()
d_df['date'] = df['date'].unique().astype('datetime64')
d_df['year'] = d_df['date'].dt.year
d_df['month'] = d_df['date'].dt.month
d_df['day'] = d_df['date'].dt.day
d_df.head()

In [122]:
# get the correct answers for every month
l = []
for y in d_df['year'].unique():
    d = d_df[d_df['year'] == y]
    l.extend(d.groupby('month')['max_load'].idxmax())
d_df['should_dispatch'] = [(i in l) for i in d_df.index]
d_df[d_df['should_dispatch']].shape
# d_df[d_df['should_dispatch'] & (d_df['month'] == 4)]
l


Out[122]:
[datetime.date(2002, 1, 3),
 datetime.date(2002, 2, 27),
 datetime.date(2002, 3, 4),
 datetime.date(2002, 4, 29),
 datetime.date(2002, 5, 1),
 datetime.date(2002, 6, 12),
 datetime.date(2002, 7, 25),
 datetime.date(2002, 8, 7),
 datetime.date(2002, 9, 5),
 datetime.date(2002, 10, 3),
 datetime.date(2002, 11, 27),
 datetime.date(2002, 12, 6),
 datetime.date(2003, 1, 24),
 datetime.date(2003, 2, 25),
 datetime.date(2003, 3, 6),
 datetime.date(2003, 4, 30),
 datetime.date(2003, 5, 30),
 datetime.date(2003, 6, 25),
 datetime.date(2003, 7, 22),
 datetime.date(2003, 8, 6),
 datetime.date(2003, 9, 9),
 datetime.date(2003, 10, 22),
 datetime.date(2003, 11, 24),
 datetime.date(2003, 12, 17),
 datetime.date(2004, 1, 6),
 datetime.date(2004, 2, 13),
 datetime.date(2004, 3, 18),
 datetime.date(2004, 4, 22),
 datetime.date(2004, 5, 28),
 datetime.date(2004, 6, 18),
 datetime.date(2004, 7, 16),
 datetime.date(2004, 8, 3),
 datetime.date(2004, 9, 17),
 datetime.date(2004, 10, 20),
 datetime.date(2004, 11, 30),
 datetime.date(2004, 12, 23),
 datetime.date(2005, 1, 6),
 datetime.date(2005, 2, 3),
 datetime.date(2005, 3, 17),
 datetime.date(2005, 4, 28),
 datetime.date(2005, 5, 22),
 datetime.date(2005, 6, 30),
 datetime.date(2005, 7, 22),
 datetime.date(2005, 8, 23),
 datetime.date(2005, 9, 28),
 datetime.date(2005, 10, 5),
 datetime.date(2005, 11, 9),
 datetime.date(2005, 12, 9),
 datetime.date(2006, 1, 24),
 datetime.date(2006, 2, 18),
 datetime.date(2006, 3, 24),
 datetime.date(2006, 4, 17),
 datetime.date(2006, 5, 25),
 datetime.date(2006, 6, 12),
 datetime.date(2006, 7, 17),
 datetime.date(2006, 8, 18),
 datetime.date(2006, 9, 1),
 datetime.date(2006, 10, 2),
 datetime.date(2006, 11, 30),
 datetime.date(2006, 12, 8),
 datetime.date(2007, 1, 15),
 datetime.date(2007, 2, 16),
 datetime.date(2007, 3, 5),
 datetime.date(2007, 4, 3),
 datetime.date(2007, 5, 14),
 datetime.date(2007, 6, 11),
 datetime.date(2007, 7, 12),
 datetime.date(2007, 8, 14),
 datetime.date(2007, 9, 27),
 datetime.date(2007, 10, 1),
 datetime.date(2007, 11, 26),
 datetime.date(2007, 12, 17),
 datetime.date(2008, 1, 24),
 datetime.date(2008, 2, 1),
 datetime.date(2008, 3, 7),
 datetime.date(2008, 4, 22),
 datetime.date(2008, 5, 23),
 datetime.date(2008, 6, 16),
 datetime.date(2008, 7, 28),
 datetime.date(2008, 8, 4),
 datetime.date(2008, 9, 11),
 datetime.date(2008, 10, 3),
 datetime.date(2008, 11, 21),
 datetime.date(2008, 12, 15),
 datetime.date(2009, 1, 28),
 datetime.date(2009, 2, 4),
 datetime.date(2009, 3, 2),
 datetime.date(2009, 4, 22),
 datetime.date(2009, 5, 26),
 datetime.date(2009, 6, 25),
 datetime.date(2009, 7, 13),
 datetime.date(2009, 8, 25),
 datetime.date(2009, 9, 8),
 datetime.date(2009, 10, 1),
 datetime.date(2009, 11, 30),
 datetime.date(2009, 12, 10),
 datetime.date(2010, 1, 8),
 datetime.date(2010, 2, 9),
 datetime.date(2010, 3, 2),
 datetime.date(2010, 4, 29),
 datetime.date(2010, 5, 28),
 datetime.date(2010, 6, 23),
 datetime.date(2010, 7, 15),
 datetime.date(2010, 8, 23),
 datetime.date(2010, 9, 14),
 datetime.date(2010, 10, 19),
 datetime.date(2010, 11, 30),
 datetime.date(2010, 12, 13),
 datetime.date(2011, 1, 11),
 datetime.date(2011, 2, 10),
 datetime.date(2011, 3, 25),
 datetime.date(2011, 4, 19),
 datetime.date(2011, 5, 31),
 datetime.date(2011, 6, 28),
 datetime.date(2011, 7, 25),
 datetime.date(2011, 8, 3),
 datetime.date(2011, 9, 2),
 datetime.date(2011, 10, 7),
 datetime.date(2011, 11, 28),
 datetime.date(2011, 12, 7),
 datetime.date(2012, 1, 13),
 datetime.date(2012, 2, 13),
 datetime.date(2012, 3, 31),
 datetime.date(2012, 4, 26),
 datetime.date(2012, 5, 29),
 datetime.date(2012, 6, 26),
 datetime.date(2012, 7, 31),
 datetime.date(2012, 8, 1),
 datetime.date(2012, 9, 7),
 datetime.date(2012, 10, 5),
 datetime.date(2012, 11, 28),
 datetime.date(2012, 12, 26),
 datetime.date(2013, 1, 16),
 datetime.date(2013, 2, 1),
 datetime.date(2013, 3, 26),
 datetime.date(2013, 4, 16),
 datetime.date(2013, 5, 31),
 datetime.date(2013, 6, 27),
 datetime.date(2013, 7, 31),
 datetime.date(2013, 8, 7),
 datetime.date(2013, 9, 4),
 datetime.date(2013, 10, 3),
 datetime.date(2013, 11, 22),
 datetime.date(2013, 12, 10),
 datetime.date(2014, 1, 6),
 datetime.date(2014, 2, 6),
 datetime.date(2014, 3, 3),
 datetime.date(2014, 4, 23),
 datetime.date(2014, 5, 29),
 datetime.date(2014, 6, 30),
 datetime.date(2014, 7, 14),
 datetime.date(2014, 8, 25),
 datetime.date(2014, 9, 10),
 datetime.date(2014, 10, 7),
 datetime.date(2014, 11, 17),
 datetime.date(2014, 12, 31),
 datetime.date(2015, 1, 8),
 datetime.date(2015, 2, 27),
 datetime.date(2015, 3, 4),
 datetime.date(2015, 4, 2),
 datetime.date(2015, 5, 18),
 datetime.date(2015, 6, 10),
 datetime.date(2015, 7, 30),
 datetime.date(2015, 8, 10),
 datetime.date(2015, 9, 8),
 datetime.date(2015, 10, 12),
 datetime.date(2015, 11, 29),
 datetime.date(2015, 12, 28),
 datetime.date(2016, 1, 11),
 datetime.date(2016, 2, 4),
 datetime.date(2016, 3, 21),
 datetime.date(2016, 4, 26),
 datetime.date(2016, 5, 10),
 datetime.date(2016, 6, 27),
 datetime.date(2016, 7, 22),
 datetime.date(2016, 8, 11),
 datetime.date(2016, 9, 19),
 datetime.date(2016, 10, 5),
 datetime.date(2016, 11, 1),
 datetime.date(2016, 12, 19),
 datetime.date(2017, 1, 6),
 datetime.date(2017, 2, 16),
 datetime.date(2017, 3, 20),
 datetime.date(2017, 4, 20),
 datetime.date(2017, 5, 26),
 datetime.date(2017, 6, 23),
 datetime.date(2017, 7, 28),
 datetime.date(2017, 8, 11),
 datetime.date(2017, 9, 20),
 datetime.date(2017, 10, 9),
 datetime.date(2017, 11, 5),
 datetime.date(2017, 12, 31),
 datetime.date(2018, 1, 17),
 datetime.date(2018, 2, 12),
 datetime.date(2018, 3, 26),
 datetime.date(2018, 4, 13),
 datetime.date(2018, 5, 30),
 datetime.date(2018, 6, 27),
 datetime.date(2018, 7, 19),
 datetime.date(2018, 8, 7),
 datetime.date(2018, 9, 19),
 datetime.date(2018, 10, 4),
 datetime.date(2018, 11, 14),
 datetime.date(2018, 12, 10)]

1-day and 2-day forecasts


In [71]:
df = pd.read_csv('hourly/NCENT.csv', parse_dates=['dates'])
df['year'] = df['dates'].dt.year
df['month'] = df['dates'].dt.month
df['day'] = df['dates'].dt.day
df['hour'] = df['dates'].dt.hour

all_X_1 = lf.makeUsefulDf(df, noise=2.5, hours_prior=24)
all_X_2 = lf.makeUsefulDf(df, noise=4, hours_prior=48)
all_y = df['load']

p1, a1 = lf.neural_net_predictions(all_X_1, all_y)
p2, a2 = lf.neural_net_predictions(all_X_2, all_y)

In [84]:
p1_max = [max(p1[i:i+24]) for i in range(0, len(p1), 24)]
p2_max = [max(p2[i:i+24]) for i in range(0, len(p2), 24)]
a1, a2


Out[84]:
({'test': 2.171315217977105, 'train': 1.9426664351769887},
 {'test': 2.907583554651494, 'train': 2.735516462464462})

Create threshold


In [66]:
# what is the monthly threshold on prior years
max_vals = {}
for y in d_df['year'].unique()[:-1]:
    d = d_df[d_df['year'] == y]
    max_vals[y] = list(d.groupby('month')['max_load'].max())

df_thresh = pd.DataFrame(max_vals).T
thresholds = [None]*12
for i in range(12):
    thresholds[i] = df_thresh[i].min()

Make dispatch decisions


In [127]:
df_dispatch = pd.DataFrame()
df_dispatch['should_dispatch'] = d_df[d_df['year'] == 2018]['should_dispatch']
df_dispatch['load'] = d_df['max_load']
df_dispatch['1-day'] = p1_max
df_dispatch['2-day'] = p2_max
df_dispatch['month'] = d_df['month']
df_dispatch['threshold'] = df_dispatch['month'].apply(lambda x: thresholds[x-1])
df_dispatch['above_threshold'] = df_dispatch['1-day'] >= df_dispatch['threshold']
df_dispatch['2-day_lower'] = df_dispatch['2-day'] <= df_dispatch['1-day']

highest = [-1*float('inf')]*12
dispatch_highest = [False]*365
for i, (l, m) in enumerate(zip(df_dispatch['1-day'], df_dispatch['month'])):
    if l >= highest[m-1]:
        dispatch_highest[i] = True
        highest[m-1] = l

df_dispatch['highest_so_far'] = dispatch_highest
df_dispatch['dispatch'] = df_dispatch['highest_so_far'] & df_dispatch['2-day_lower'] & df_dispatch['above_threshold']
df_dispatch.head()


Out[127]:
should_dispatch 1-day 2-day month threshold above_threshold 2-day_lower actual_load highest_so_far dispatch
date
2018-01-01 False 21727.000000 20115.652344 1 13638.864085 True True 20338.611561 True True
2018-01-02 False 23110.335938 21471.070312 1 13638.864085 True True 22168.940606 True True
2018-01-03 False 22742.781250 20949.039062 1 13638.864085 True True 22017.339622 False False
2018-01-04 False 19999.328125 18993.402344 1 13638.864085 True True 19569.618815 False False
2018-01-05 False 18073.220703 17213.603516 1 13638.864085 True True 17602.195958 False False

Compare to correct answers


In [124]:
pre = np.array(df_dispatch['dispatch'])
ans = np.array(df_dispatch['should_dispatch'])

def recall(ans, pre):
    true_positive = sum(ans & pre)
    false_negative = sum(ans & (~ pre))
    return true_positive / (true_positive + false_negative + 1e-7)
def precision(ans, pre):
    true_positive = sum(ans & pre)
    false_positive = sum((~ ans) & pre)
    return (true_positive)/(true_positive + false_positive + 1e-7)
def peaks_missed(ans, pre):
    return sum(ans & (~ pre))
def unnecessary_dispatches(ans, pre):
    return sum((~ ans) & pre)

print(recall(ans, pre), precision(ans, pre), peaks_missed(ans, pre), unnecessary_dispatches(ans, pre))


0.9166666590277779 0.1896551720868014 1 47