In [234]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

%matplotlib inline

Get data


In [235]:
df = pd.read_csv('data/NCENT.csv', parse_dates=['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df.head()


Out[235]:
date max_load year month day
0 2002-01-01 12458.207405 2002 1 1
1 2002-01-02 15010.617134 2002 1 2
2 2002-01-03 16567.791274 2002 1 3
3 2002-01-04 15155.881762 2002 1 4
4 2002-01-05 11307.400179 2002 1 5

Normalize data


In [236]:
# NORMALIZE
d = {}
for y in df.year.unique():
    month_max = df[df['year']==y].groupby('month')['max_load'].max()
    month_min = df[df['year']==y].groupby('month')['max_load'].min()
    for m, (m_min, m_max) in enumerate(zip(month_min, month_max)):
        d['{}-{}'.format(y, m+1)] = [m_min, m_max]

def norm_load(row):
    year = row['year']
    month = row['month']
    l = d['{}-{}'.format(int(year), int(month))]
    m_min = l[0]
    m_max = l[1]
    return (row['max_load'] - m_min)/(m_max - m_min)

def norm(c):
    return (c - c.min())/(c.max() - c.min()) 

df_n = pd.DataFrame()
df_n['peak_prob'] = df.apply(norm_load, axis=1)
df_n['load_n'] = norm(df['max_load'])
df_n['year_n'] = norm(df['year'])
df_n['day'] = df['day'] / 31
for i in range(1, 13):
    df_n['month-{}'.format(i)] = df['month'] == i
df_n.tail()


Out[236]:
peak_prob load_n year_n day month-1 month-2 month-3 month-4 month-5 month-6 month-7 month-8 month-9 month-10 month-11 month-12
6200 0.319464 0.259007 1.0 0.870968 False False False False False False False False False False False True
6201 0.663673 0.393734 1.0 0.903226 False False False False False False False False False False False True
6202 0.730352 0.419833 1.0 0.935484 False False False False False False False False False False False True
6203 0.619110 0.376292 1.0 0.967742 False False False False False False False False False False False True
6204 0.490787 0.326065 1.0 1.000000 False False False False False False False False False False False True

Prepare for cross-validation


In [237]:
all_X = df_n.drop(['peak_prob'], axis=1)
all_y = df_n['peak_prob']
X = all_X[:-365]
y = all_y[:-365]
X_test = all_X[-365:]
y_test = all_y[-365:]

Try linear regression


In [238]:
model = LinearRegression().fit(X, y)
predictions_lr = model.predict(X_test)
print(model.score(X, y), model.score(X_test, y_test))


0.7911306514217318 0.6424733596407031

Try neural network


In [260]:
def MAPE(ans, pre):
    return sum([abs(x-y)/(abs(y) + 1e-7) for (x, y) in zip(ans, pre)])/len(pre)*100

model = keras.Sequential([
    layers.Dense(X.shape[1], activation=tf.nn.relu, input_shape=[X.shape[1]]),
    layers.Dense(1)
])

optimizer = tf.keras.optimizers.RMSprop(0.001)

model.compile(loss='mean_squared_error',
                optimizer=optimizer,
                metrics=['mean_absolute_error', 'mean_squared_error'])

EPOCHS = 100
model.fit(X, y, epochs=EPOCHS, verbose=0)
predictions_nn = model.predict(X_test).flatten()
print(100 - MAPE(y, model.predict(X).flatten()), 100 - MAPE(y_test, model.predict(X_test).flatten()))


48.80537324479677 36.172747450248764

Recall / Precision analysis


In [319]:
def recall(ans, pre):
    true_positive = sum(ans & pre)
    false_negative = sum(ans & (~ pre))
    return true_positive / (true_positive + false_negative + 1e-7)
def precision(ans, pre):
    true_positive = sum(ans & pre)
    false_positive = sum((~ ans) & pre)
    return (true_positive)/(true_positive + false_positive + 1e-7)
def peaks_missed(ans, pre):
    return sum(ans & (~ pre)) / 12
def unnecessary_dispatches(ans, pre):
    return sum((~ ans) & pre) / len(pre)

def run_testing_gambit(y_hat, y_test):
    results = {'recall': [], 'precision' : [], 'peaks_missed': [], 'unnecessary_dispatches': []}
    es = np.linspace(0,1,100)
    y_a = y_test == 1
    for e in es:
        p_a = y_hat > e
        results['recall'].append(recall(y_a, p_a))
        results['precision'].append(precision(y_a, p_a))
        results['peaks_missed'].append(peaks_missed(y_a, p_a))
        results['unnecessary_dispatches'].append(unnecessary_dispatches(y_a, p_a))
    df_r = pd.DataFrame(results)
    df_r['F1'] = df_r['recall']*df_r['precision'] / (df_r['recall'] + df_r['precision'])
    return df_r

df_nn = run_testing_gambit(predictions_nn, y_test)
df_lr = run_testing_gambit(predictions_lr, y_test)

In [262]:
df_nn.plot()


Out[262]:
<matplotlib.axes._subplots.AxesSubplot at 0x14d6c2e80>

In [263]:
df_lr.plot()


Out[263]:
<matplotlib.axes._subplots.AxesSubplot at 0x14d8770b8>

Simple quintile


In [290]:
# what if I say: dispatch if in x percentile of historical for this month is
results = {'recall': [], 'precision' : [], 'peaks_missed': [], 'unnecessary_dispatches': []}
es = np.linspace(0,1,100)
s = df[:-365].groupby('month')['max_load']
load_test = df['max_load'][-365:]
month_test = df['month'][-365:]
y_a = y_test == 1
for e in es:
    month_quant = [s.quantile(e)[month] for month in range(1, 13)]
    p_a = np.array([load >= month_quant[month-1] for load, month in zip(load_test, month_test)])
    results['recall'].append(recall(y_a, p_a))
    results['precision'].append(precision(y_a, p_a))
    results['peaks_missed'].append(peaks_missed(y_a, p_a))
    results['unnecessary_dispatches'].append(unnecessary_dispatches(y_a, p_a))
df_r = pd.DataFrame(results)
df_r['F1'] = df_r['recall']*df_r['precision'] / (df_r['recall'] + df_r['precision'])
df_r.head()


Out[290]:
recall precision peaks_missed unnecessary_dispatches F1
0 1.0 0.032877 0.0 0.967123 0.031830
1 1.0 0.032877 0.0 0.967123 0.031830
2 1.0 0.032877 0.0 0.967123 0.031830
3 1.0 0.032877 0.0 0.967123 0.031830
4 1.0 0.032967 0.0 0.964384 0.031915

In [292]:
df_r.plot()


Out[292]:
<matplotlib.axes._subplots.AxesSubplot at 0x14deee668>

Brilliant! Let's combine that


In [349]:
e = .91
s = df[:-365].groupby('month')['max_load']
load_test = df['max_load'][-365:]
month_test = df['month'][-365:]
month_quant = [s.quantile(e)[month] for month in range(1, 13)]

df_n['monthly_q'] = (df.month.apply(lambda x: month_quant[x-1]))
df_n = pd.DataFrame()
df_n['peak_prob'] = df.apply(norm_load, axis=1)
df_n['load_n'] = norm(df['max_load'])
df_n['year_n'] = norm(df['year'])
df_n['day_n'] = df['day'] / 31
for i in range(1, 13):
    df_n['month-{}'.format(i)] = df['month'] == i
all_X = df_n.drop(['peak_prob'], axis=1)
all_y = df_n['peak_prob']
X = all_X[:-365]
y = all_y[:-365]
X_test = all_X[-365:]
y_test = all_y[-365:]

predictions_q = np.array([load >= month_quant[month-1] for load, month in zip(load_test, month_test)])

highest_l = [-42]*12
dispatch = [None]*365
for i, (l, m) in enumerate(zip(X_test['load_n'], df['month'][-365:])):
    if highest_l[m-1] == -42:
        highest_l[m-1] = l
        dispatch[i] = True
    elif l < highest_l[m-1]:
        dispatch[i] = False
    elif l >= highest_l[m-1]:
        dispatch[i] = True
        highest_l[m-1] = l
dispatch = np.array(dispatch)
dispatch[~predictions_q] = 0

dispatch2 = [None]*365
loads = list(X_test['load_n'])
months_d = list(df['month'][-365:])
for i, (l, m) in enumerate(zip(loads[:-1], months_d[:-1])):
    if l > loads[i+1] and months_d[i] == months_d[i+1]:
        dispatch2[i] = True
    else:
        dispatch2[i] = False
dispatch2[-1] = False
dispatch2 = np.array(dispatch2)

predictions_nn[~dispatch] = 0
predictions_nn[~dispatch2] = 0
df_r = run_testing_gambit(predictions_nn, y_test)
df_r.plot()


Out[349]:
<matplotlib.axes._subplots.AxesSubplot at 0x11a308fd0>

In [350]:
df_r.head()


Out[350]:
recall precision peaks_missed unnecessary_dispatches F1
0 1.0 0.521739 0.0 0.030137 0.342857
1 1.0 0.521739 0.0 0.030137 0.342857
2 1.0 0.521739 0.0 0.030137 0.342857
3 1.0 0.521739 0.0 0.030137 0.342857
4 1.0 0.521739 0.0 0.030137 0.342857

In [355]:
m = predictions_nn[predictions_nn != 0]
m.shape


Out[355]:
(23,)

In [354]:
predictions_nn


Out[354]:
array([0.        , 1.0291137 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 1.1585015 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.8429032 , 0.        , 0.        ,
       0.        , 0.        , 0.86863965, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.5736975 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.6746999 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.69361305, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.7345391 , 0.        ,
       0.        , 0.        , 0.871024  , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.9738486 ,
       0.        , 0.        , 0.9255931 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.9139426 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.96346664, 0.        , 0.        ,
       0.        , 0.        , 0.9802121 , 0.        , 0.        ,
       0.        , 0.        , 0.7350315 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.96005046,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.93806326, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.7908511 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.80250734, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.8941884 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.9745005 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.6243006 , 0.        ,
       0.        , 0.        , 0.        , 0.605555  , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ],
      dtype=float32)

In [ ]: