In [212]:
from datetime import datetime
import json
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, DayLocator, HourLocator
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
%matplotlib inline

import urllib2
from lxml import etree
from bs4 import BeautifulSoup
import time
from calendar import monthrange

import numpy as np
import copy

import statsmodels.api as sm
import pickle

import seaborn as sns
sns.set(context="paper", font="monospace")

import math

In [9]:
with open('data/df_gen.pkl','rb') as f:
    df_big_gen = pickle.load(f)

In [10]:
start_date = datetime(2013,12,20)
end_date = datetime(2013,12,31)
df_big_gen['net_load'] = df_big_gen['Demanda'] - df_big_gen['wind']
df_big_gen['month'] = df_big_gen.index.month
df_big_gen['day_of_year'] = df_big_gen.index.dayofyear
df_big_gen['hour'] = df_big_gen.index.hour

In [11]:
df_plot = copy.deepcopy(df_big_gen[start_date:end_date])
ax = plt.subplot()
plt.plot(df_plot.index,df_plot['Demanda'],'r',label='Load')
plt.plot(df_plot.index,df_plot['net_load'],'g',label='Net Load')
plt.ylim(0,600)
fmt = DateFormatter('%d\n%m')
ax.xaxis.set_major_formatter(fmt)
plt.fill_between(df_plot.index, df_plot['Demanda'], df_plot['net_load'], color='blue', alpha='0.2',label='Wind')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)


Out[11]:
<matplotlib.legend.Legend at 0x7f5b81961d10>

In [12]:
#Total wind size is 40 + 23 + 40 + 39.6 + 44 = 186.6 MW
pvflh = []
for i in range(2012,2015):
    start_date = datetime(i,1,1)
    end_date = datetime(i,12,31)
    df_year = copy.deepcopy(df_big_gen[start_date:end_date])
    print str(i) + ': ' + str(round(sum(df_year['wind'])/186.6)) + ' full load hours (out of 8760)'


2012: 1733.0 full load hours (out of 8760)
2013: 3002.0 full load hours (out of 8760)
2014: 4453.0 full load hours (out of 8760)

In [17]:
start_date = datetime(2013,1,1)
end_date = datetime(2013,12,31)
df_ramp = copy.deepcopy(df_big_gen[start_date:end_date])
df_ramp['1_hour_ramp_load'] = df_big_gen['Demanda'].diff(1)
df_ramp['1_hour_ramp_wind'] = df_big_gen['wind'].diff(1)
df_ramp = df_ramp.dropna()

In [18]:
D = sum(df_ramp['Demanda'])
alpha = sum(df_ramp['wind'])/D
B = 0
u_w = np.mean(df_ramp['wind'])

In [19]:
df_ramp['1_hour_ramp_net_load'] = df_ramp['1_hour_ramp_load']-(alpha*D/(8760*u_w))*df_ramp['1_hour_ramp_wind']/186.6

In [20]:
df_ramp['1_hour_ramp_wind_norm'] = (alpha*D/(8760*u_w))*df_ramp['1_hour_ramp_wind']/186.6

In [21]:
hist = plt.hist(df_ramp['1_hour_ramp_wind_norm'],100)
plt.ylabel('Frequency per year')
plt.xlabel('1-hour ramps [share of capacity]')
pass



In [22]:
heatmap_load = np.zeros((len(df_ramp['day_of_year'].unique()),len(df_ramp['hour'].unique())))
heatmap_wind = np.zeros((len(df_ramp['day_of_year'].unique()),len(df_ramp['hour'].unique())))
for i,val in enumerate(df_ramp.iterrows()):
    heatmap_load[val[0].dayofyear-1][val[0].hour] = val[1]['1_hour_ramp_load']/max(df_ramp['Demanda'])
    heatmap_wind[val[0].dayofyear-1][val[0].hour] = val[1]['1_hour_ramp_wind']/186.6

In [23]:
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load,linewidths=0, robust=True, square=False)
plt.title('Demand 1-hour ramps[share of peak load]')
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
f.tight_layout()

plt.figure()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load, linewidths=0, robust=True, square=False)
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
plt.title('Wind 1-hour ramps[share of peak capacity]')
f.tight_layout()


<matplotlib.figure.Figure at 0x7f5b71d421d0>

In [40]:
mean = []
sigma = []
for i in range(0,200,5):
    mean.append(np.mean(df_big_gen['wind'][df_big_gen['wind']>i][df_big_gen['wind']<i+20]))
    sigma.append(np.std(df_big_gen['wind'][df_big_gen['wind']>i][df_big_gen['wind']<i+20]))
plt.plot(mean,sigma)


Out[40]:
[<matplotlib.lines.Line2D at 0x7f5b71b7abd0>]

In [196]:
start_date = datetime(2013,1,1)
end_date = datetime(2013,12,31)
df_sub = copy.deepcopy(df_big_gen[start_date:end_date])
plt.plot(df_sub.index,df_sub['wind'])
#September 2012, PBP came online?
#December 2012 ABR came online?
#February 2014 EOLO came online?


Out[196]:
[<matplotlib.lines.Line2D at 0x7f5b649c07d0>]

In [197]:
mean = []
sigma = []
for i in range(0,140,5):
    mean.append(np.mean(df_sub['wind'][df_sub['wind']>i][df_sub['wind']<i+20]))
    sigma.append(np.std(df_sub['wind'][df_sub['wind']>i][df_sub['wind']<i+20]))

fig = plt.figure()
ax1 = fig.add_subplot(111)
hist = ax1.hist(df_sub['wind'],28)
plt.ylabel('# of Occurences')
plt.xlabel('Hourly Wind (kW)')
ax2 = ax1.twinx()
ax2.plot(mean,sigma,'r.')
ax2.grid(None)
plt.xlim([0,140])
plt.ylim([0,6.5])
plt.ylabel('Variability (1-std)')
print 'Average Sigma (ignoring end points):' + str(np.mean(sigma[:-3]))


Average Sigma (ignoring end points):5.78599196275

In [198]:
mean = []
sigma = []
for i in range(240,600,20):
    mean.append(np.mean(df_sub['Demanda'][df_sub['Demanda']>i][df_sub['Demanda']<i+50]))
    sigma.append(np.std(df_sub['Demanda'][df_sub['Demanda']>i][df_sub['Demanda']<i+50]))
mean = np.array(mean)
sigma = np.array(sigma)
fig = plt.figure()
ax1 = fig.add_subplot(111)
hist = ax1.hist(df_sub['Demanda'],28)
plt.ylabel('# of Occurences')
plt.xlabel('Hourly Demand (kW)')
ax2 = ax1.twinx()
ax2.plot(mean,sigma,'r.')
ax2.grid(None)
plt.xlim([0,700])
plt.ylim([0,16])
plt.ylabel('Variability (1-std)')
poly = np.polyfit(mean,sigma,2)
plt.plot(mean,poly[0]*mean*mean+poly[1]*mean+poly[2],'r')
print 'Average Sigma (ignoring end points):' + str(np.mean(sigma[:-3]))


Average Sigma (ignoring end points):12.6142163978

In [205]:
demand_std = poly[0]*df_sub['Demanda']*df_sub['Demanda']+poly[1]*df_sub['Demanda']+poly[2]
reg_requirement = 3*np.sqrt([x**2+5.78**2 for x in demand_std])
plt.plot(df_sub.index,reg_requirement,'.')


Out[205]:
[<matplotlib.lines.Line2D at 0x7f5b5f831f90>]

In [233]:
train_start_date = datetime(2012,1,1)
train_end_date = datetime(2012,12,31)

test_start_date = datetime(2013,1,1)
test_end_date = datetime(2013,12,31)

def mls(df):
    df['wind_shift_1'] = df['wind'].shift(-1)
    df_train = df[train_start_date:train_end_date]
    df_test = df[test_start_date:test_end_date]
    x_train = df_train.drop('wind_shift_1',1)
    x_test = df_test.drop('wind_shift_1',1)

    y_train = df['wind_shift_1'][train_start_date:train_end_date]
    y_test = df['wind_shift_1'][test_start_date:test_end_date]
   
    x_train = sm.add_constant(x_train, prepend=True) #add a constant
    x_test = sm.add_constant(x_test, prepend=True) #add a constant
    print df_train.drop('wind_shift_1',1).keys()
    ols = sm.OLS(y_train,x_train)
    res = ols.fit() #create a model and fit it
    pred_y = res.predict(x_test)

    error = (y_test - pred_y)
    mape = np.mean(abs(error))

    print "MAE ERROR: " + str(mape)
    print "MAE ERROR/MEAN: " + str(mape/np.mean(y_test))
    print "MAX ERROR: " + str(max(error))
    #print res.summary()
    return pred_y

In [272]:
df_sub['pred_y'] = mls(df_big_gen)


Index([u'wind', u'biomass', u'interconnect', u'geothermal', u'bunker', u'hydro', u'Demanda', u'net_load', u'month', u'day_of_year', u'hour', u'1_hour_ramp_load', u'1_hour_ramp_wind'], dtype='object')
MAE ERROR: 11.2533415007
MAE ERROR/MEAN: 0.175558029268
MAX ERROR: 133.654412604

In [237]:
plt.plot(df_sub['wind']-pred_y)


Out[237]:
[<matplotlib.lines.Line2D at 0x7f5b5f576b10>]

In [286]:
mean = []
sigma = []
for i in range(0,140,5):
    df_vals = df_sub[df_sub['wind_shift_1']>i][df_sub['wind_shift_1']<i+5]
    mean.append(np.mean(df_vals['wind_shift_1']))
    sigma.append(np.std(df_vals['wind_shift_1']-df_vals['pred_y']))
mean = np.array(mean)
sigma = np.array(sigma)
fig = plt.figure()
ax1 = fig.add_subplot(111)
hist = ax1.hist(df_sub['wind'],28)
plt.ylabel('# of Occurences')
plt.xlabel('Hourly Wind (kW)')
ax2 = ax1.twinx()
ax2.plot(mean,sigma,'r.')
ax2.grid(None)
#plt.xlim([0,140])
#plt.ylim([0,6.5])
plt.ylabel('Variability (1-std)')
print 'Average Sigma (ignoring end points):' + str(np.mean(sigma[:-3]))


Average Sigma (ignoring end points):17.0075079045

In [284]:
np.std(df_sub['wind']-df_sub['pred_y'])


Out[284]:
4.5388175952226861

In [255]:
plt.plot(error)


Out[255]:
[<matplotlib.lines.Line2D at 0x7f5b5f2d76d0>]

In [ ]: