In [7]:
import pandas as pd
import holidays, calendar
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt, timedelta
%matplotlib inline
df = pd.read_csv('Texas_17yr_TempAndLoad.csv', parse_dates=['dates'])
In [8]:
nercHolidays = ["New Year's Day", 'Memorial Day', 'Independence Day', 'Labor Day', 'Thanksgiving Day', 'Christmas Day']
In [9]:
df['year'] = df['dates'].dt.year
df['month'] = df['dates'].dt.month
df['hour'] = df['dates'].dt.hour
df['date'] = df['dates'].dt.date
df.sample(3)
Out[9]:
In [25]:
def plotHolidays(df, holiday, ax):
'''Given a particular holiday and many years load data, create plots comparing
how a holiday's daily load may be different than the monthly average '''
# find the dates of the holiday for all years in df
allYears = list(df['year'].unique())
allHolidays = dict(holidays.US(years=allYears))
dtlist = [k for k, v in allHolidays.items() if v == holiday]
# this is an estimate. Some holidays liket
monthOfHoliday = dtlist[0].month
# calculate what the typical day looks like
typicalDayInMonth = df[df['month'] == monthOfHoliday].groupby('hour')['load'].median()
typicalHoliday = df[df['date'].isin(dtlist)].groupby('hour')['load'].median()
# plot
ax.plot(typicalDayInMonth)
ax.plot(typicalHoliday)
ax.legend(['Monthly median', holiday])
ax.set_title('{} compared to typical day in {}'.format(holiday, calendar.month_name[monthOfHoliday]))
ax.set_xlabel('time')
ax.set_ylabel('kW')
# make clearer x labels
a = ['{}am'.format(i) if i != 0 else '12am' for i in range(12)]
a = a + ['{}pm'.format(i) if i != 0 else '12pm' for i in range(12)]
ax.set_xticks(range(24))
ax.set_xticklabels(a)
# set limits
ax.set_ylim((0, 20000))
return typicalDayInMonth, typicalHoliday, a
allholidays = list(dict(holidays.US(years=2015)).values())
fig, axes = plt.subplots(ncols=2, nrows=len(allholidays), figsize=(30, 50))
for i, h in enumerate(allholidays):
t1, t2, a = plotHolidays(df, h, axes[i, 0])
t1, t2 = np.array(t1), np.array(t2)
avgdif = ((t2 - t1) / t1) * 100
axes[i, 1].bar(a, avgdif)
axes[i, 1].set_ylim(-20, 15)
axes[i, 1].set_ylabel('percent increase')
axes[i, 1].set_ylabel('time')
axes[i, 1].set_title('{} percent increase from median'.format(h))
plt.tight_layout()