Effect of different holidays on energy consumption


In [7]:
import pandas as pd
import holidays, calendar
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt, timedelta
%matplotlib inline
df = pd.read_csv('Texas_17yr_TempAndLoad.csv', parse_dates=['dates'])

In [8]:
nercHolidays = ["New Year's Day", 'Memorial Day', 'Independence Day', 'Labor Day', 'Thanksgiving Day', 'Christmas Day']

In [9]:
df['year'] = df['dates'].dt.year
df['month'] = df['dates'].dt.month
df['hour'] = df['dates'].dt.hour
df['date'] = df['dates'].dt.date
df.sample(3)


Out[9]:
dates load tempc year month hour date
38057 2006-05-06 17:00:00 10367.941312 22.8 2006 5 17 2006-05-06
12972 2003-06-25 12:00:00 17865.904046 26.1 2003 6 12 2003-06-25
31044 2005-07-18 12:00:00 17077.651243 26.1 2005 7 12 2005-07-18

In [25]:
def plotHolidays(df, holiday, ax):
    '''Given a particular holiday and many years load data, create plots comparing 
    how a holiday's daily load may be different than the monthly average '''
    
    # find the dates of the holiday for all years in df
    allYears = list(df['year'].unique())
    allHolidays = dict(holidays.US(years=allYears))
    dtlist = [k for k, v in allHolidays.items() if v == holiday]
    
    # this is an estimate. Some holidays liket
    monthOfHoliday = dtlist[0].month
    
    # calculate what the typical day looks like
    typicalDayInMonth = df[df['month'] == monthOfHoliday].groupby('hour')['load'].median()
    typicalHoliday = df[df['date'].isin(dtlist)].groupby('hour')['load'].median()

    # plot
    ax.plot(typicalDayInMonth)
    ax.plot(typicalHoliday)
    ax.legend(['Monthly median', holiday])
    ax.set_title('{} compared to typical day in {}'.format(holiday, calendar.month_name[monthOfHoliday]))
    ax.set_xlabel('time')
    ax.set_ylabel('kW')
    
    # make clearer x labels
    a = ['{}am'.format(i) if i != 0 else '12am' for i in range(12)]
    a = a + ['{}pm'.format(i) if i != 0 else '12pm' for i in range(12)]
    ax.set_xticks(range(24))
    ax.set_xticklabels(a)
    
    # set limits
    ax.set_ylim((0, 20000))
    
    return typicalDayInMonth, typicalHoliday, a

allholidays = list(dict(holidays.US(years=2015)).values())
fig, axes = plt.subplots(ncols=2, nrows=len(allholidays), figsize=(30, 50))
for i, h in enumerate(allholidays):
    t1, t2, a = plotHolidays(df, h, axes[i, 0])
    t1, t2 = np.array(t1), np.array(t2)
    avgdif = ((t2 - t1) / t1) * 100
    axes[i, 1].bar(a, avgdif)
    axes[i, 1].set_ylim(-20, 15)
    axes[i, 1].set_ylabel('percent increase')
    axes[i, 1].set_ylabel('time')
    axes[i, 1].set_title('{} percent increase from median'.format(h))
plt.tight_layout()