``````

In [50]:

import pandas as pd
import numpy as np
import os
import pickle
from scipy.stats.mstats import zscore
import time
from datetime import date

``````
``````

In [2]:

"""
What do I need to test:
train accuracy
test accuracy

What am I looking to change
- leaky_relu, relu, tanh, etc...
- number of layers
- number of nodes

Secondary:
- What about the SPP? Ignore for now
- How does model quality change when given fewer years?

----------------
First decide best structure:
layers [1, 5, 10] and nodes [36, 50, 72, 100]
2D matrix with train/test harmonic mean
"""

``````
``````

Out[2]:

'\nWhat do I need to test:\ntrain accuracy\ntest accuracy\n\nWhat am I looking to change\n- leaky_relu, relu, tanh, etc...\n- number of layers\n- range or std\n- number of nodes\n- \n\nSecondary:\n- What about the SPP? Ignore for now\n- How does model quality change when given fewer years?\n\n\n----------------\nFirst decide best structure:\nlayers [1, 5, 10] and nodes [36, 50, 72, 100]\n2D matrix with train/test harmonic mean\n'

``````
``````

In [73]:

with open('holidays.pickle', 'rb') as f:

def isHoliday(holiday, df):
# New years, memorial, independence, labor day, Thanksgiving, Christmas
m1 = None
if holiday == "New Year's Day":
m1 = (df["dates"].dt.month == 1) & (df["dates"].dt.day == 1)
if holiday == "Independence Day":
m1 = (df["dates"].dt.month == 7) & (df["dates"].dt.day == 4)
if holiday == "Christmas Day":
m1 = (df["dates"].dt.month == 12) & (df["dates"].dt.day == 25)
m1 = df["dates"].dt.date.isin(nerc6[holiday]) if m1 is None else m1
m2 = df["dates"].dt.date.isin(nerc6.get(holiday + " (Observed)", []))
return m1 | m2

def makeUsefulDf(df):
"""
Turn a dataframe of datetime and load data into a dataframe useful for
machine learning. Normalize values and turn
Features are placed into r_df (return dataframe), creates the following columns

YEARS SINCE 2000

LOAD AT THIS TIME DAY BEFORE

HOUR OF DAY
- is12AM (0, 1)
- is1AM (0, 1)
...
- is11PM (0, 1)

DAYS OF THE WEEK
- isSunday (0, 1)
- isMonday (0, 1)
...
- isSaturday (0, 1)

MONTHS OF THE YEAR
- isJanuary (0, 1)
- isFebruary (0, 1)
...
- isDecember (0, 1)

TEMPERATURE
- Celcius (normalized from -1 to 1)

- 12AM of day previous (normalized from -1 to 1)
- 1AM of day previous (normalized from -1 to 1)
...
- 11PM of day previous (normalized from -1 to 1)

HOLIDAYS (the nerc6 holidays)
- isNewYears (0, 1)
- isMemorialDay (0, 1)
...
- is Christmas (0, 1)

"""

def _normalizeCol(l):
#s = l.max() - l.min()
#return l if s == 0 else (l - l.mean()) / l.std()
return zscore(l)
def _chunks(l, n):
return [l[i : i + n] for i in range(0, len(l), n)]

r_df = pd.DataFrame()
r_df["years_n"] = _normalizeCol(df["dates"].dt.year - 2000)

# fix outliers
m = df["tempc"].replace([-9999], np.nan)
m.ffill(inplace=True)
# 2.5 degrees average std error for the national weather service
temp_noise = m
r_df["temp_n"] = _normalizeCol(temp_noise)
r_df['temp_n^2'] = r_df["temp_n"]**2

# create day of week vector
r_df["day"] = df["dates"].dt.dayofweek  # 0 is Monday.
w = ["S", "M", "T", "W", "R", "F", "A"]
for i, d in enumerate(w):
r_df[d] = (r_df["day"] == i).astype(int)

# create hour of day vector
r_df["hour"] = df["dates"].dt.hour
d = [("h" + str(i)) for i in range(24)]
for i, h in enumerate(d):
r_df[h] = (r_df["hour"] == i).astype(int)

# create month vector
r_df["month"] = df["dates"].dt.month
y = [("m" + str(i)) for i in range(12)]
for i, m in enumerate(y):
r_df[m] = (r_df["month"] == i).astype(int)

# create 'load day before' vector
n = np.array([val for val in _chunks(list(r_df["load_n"]), 24) for _ in range(24)])
l = ["l" + str(i) for i in range(24)]
for i, s in enumerate(l):
r_df[s] = n[:, i]

# create holiday booleans
r_df["isNewYears"] = isHoliday("New Year's Day", df)
r_df["isMemorialDay"] = isHoliday("Memorial Day", df)
r_df["isIndependenceDay"] = isHoliday("Independence Day", df)
r_df["isLaborDay"] = isHoliday("Labor Day", df)
r_df["isThanksgiving"] = isHoliday("Thanksgiving", df)
r_df["isChristmas"] = isHoliday("Christmas Day", df)

m = r_df.drop(["month", "hour", "day", "load_n"], axis=1)
return m

def neural_net_predictions(all_X, all_y):
import tensorflow as tf
from tensorflow.keras import layers
tic = time.time()
X_train, y_train = all_X[:-8760], all_y[:-8760]

model = tf.keras.Sequential([
layers.Dense(all_X.shape[1], activation=tf.nn.relu, input_shape=[len(X_train.keys())]),
layers.Dense(all_X.shape[1], activation=tf.nn.relu),
layers.Dense(all_X.shape[1], activation=tf.nn.relu),
layers.Dense(all_X.shape[1], activation=tf.nn.relu),
layers.Dense(all_X.shape[1], activation=tf.nn.relu),
layers.Dense(1)
])

optimizer = tf.keras.optimizers.RMSprop(0.001)

model.compile(
loss="mean_squared_error",
optimizer=optimizer,
metrics=["mean_absolute_error", "mean_squared_error"],
)

EPOCHS = 100

early_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10)

history = model.fit(
X_train,
y_train,
epochs=EPOCHS,
validation_split=0.2,
verbose=0,
callbacks=[early_stop],
)

# THIS IS PROBLEMATIC IF Y is EVER 0

predictions = [float(f) for f in model.predict(all_X[-8760:])]
train = [float(f) for f in model.predict(all_X[:-8760])]
accuracy = {
'test': MAPE(predictions, all_y[-8760:]),
'train': MAPE(train, all_y[:-8760])
}

return predictions, accuracy, time.time() - tic

``````
``````

In [74]:

# results = {}
# df_dict = {}
# df_dict['test3'] = {}

# for file in os.listdir('data/test'):
#     if not file.startswith('.'):
#         region = file[:-4]
#         df = pd.read_csv('data/test/' + file, parse_dates=['dates'])
#         all_X = makeUsefulDf(df)
#         predictions, accuracy = neural_net_predictions(all_X, all_y)
#         results[region] = accuracy
#         df_dict['test3'][region] = (2/((1/accuracy['test']) + (1/accuracy['train'])))
#         print(accuracy, df_dict)

``````
``````

In [75]:

all_X = makeUsefulDf(df)
predictions, accuracy, t = neural_net_predictions(all_X, all_y)
print(accuracy, t)

``````
``````

{'test': 0.854672380569408, 'train': 1.273000491187482} 562.1585657596588

``````
``````

In [76]:

all_X = makeUsefulDf(df)
predictions, accuracy, t = neural_net_predictions(all_X, all_y)
print(accuracy, t)

``````
``````

{'test': 0.7357434840690646, 'train': 0.6403265775499171} 1073.6106476783752

``````
``````

In [ ]:

# YAYYYYYYYYYYY

``````