In [1]:
from datetime import datetime
import json
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, DayLocator, HourLocator
%matplotlib inline

import urllib2
from lxml import etree
from bs4 import BeautifulSoup
import time

from calendar import monthrange

import numpy as np
import copy

import statsmodels.api as sm
from scipy import stats
import pickle

import seaborn as sns
sns.set(context="paper", font="monospace")

In [2]:
with open('../data/df_gen.pkl','rb') as f:
    df_orig = pickle.load(f)

In [3]:
#1-Hour Ramp
df = copy.deepcopy(df_orig)

In [4]:
df['wind_diff'] = df['wind'].diff(1)
df['hydro_diff'] = df['hydro'].diff(1)
df['bunker_diff'] = df['bunker'].diff(1)
df['biomass_diff'] = df['biomass'].diff(1)
df['interconnect_diff'] = df['interconnect'].diff(1)
df['Demanda_diff'] = df['Demanda'].diff(1)
df['Demanda_diff_12'] = df['Demanda'].diff(12)
df['Demandnowind'] = df['Demanda'] - df['wind']+df['interconnect']
df['Dnowinddiff'] = df['Demandnowind'].diff(1)
df['Dnowinddiff_12'] = df['Demandnowind'].diff(12)

In [5]:
start_date = datetime(2013,1,1)
end_date = datetime(2013,1,5)
plt.plot(df[start_date:end_date]['Demandnowind'])
plt.plot(df[start_date:end_date]['Demanda'],'k')


Out[5]:
[<matplotlib.lines.Line2D at 0x7feb6df58f10>]

In [6]:
df_filter = df[df['Dnowinddiff_12']<0][df['Dnowinddiff']>0][df['hydro_diff']>0]


/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py:1808: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)

In [7]:
hist = plt.hist(df_filter.index.hour)
plt.title('Filtered Values by Hour')
plt.xlabel('Hour of Day')
plt.ylabel('#')


Out[7]:
<matplotlib.text.Text at 0x7feb6c73a290>

In [8]:
start_date_cutout = datetime(2013,1,1,14)
end_date_cutout = datetime(2013,1,2,14)
df_plot = df[start_date_cutout:end_date_cutout]
plt.plot(df_filter[start_date_cutout:end_date_cutout].index,df_filter[start_date_cutout:end_date_cutout]['Demanda']/max(df['Demanda']),'.')
plt.plot(df_plot.index,df_plot['Demanda']/max(df['Demanda']))
plt.plot(df_plot.index,df_plot['wind_diff']/max(abs(df_plot['wind_diff'])))
plt.plot(df_plot.index,df_plot['hydro_diff']/max(abs(df_plot['hydro_diff'])),'b')
plt.plot(df_plot.index,df_plot['bunker_diff']/max(abs(df_plot['bunker_diff'])),'k')
hydro_diff_mean = np.mean(df_filter[start_date_cutout:end_date_cutout]['hydro_diff'])
bunker_diff_mean = np.mean(df_filter[start_date_cutout:end_date_cutout]['bunker_diff'])
dnowind_diff_mean = np.mean(df_filter[start_date_cutout:end_date_cutout]['Dnowinddiff'])
print hydro_diff_mean/dnowind_diff_mean
print bunker_diff_mean/dnowind_diff_mean


0.35710206299
0.682137931995

In [9]:
with open('../data/df_temp.pkl','rb') as f:
    df_weather = pickle.load(f)

In [10]:
df_hydro_contrib = copy.deepcopy(df_filter['hydro_diff'].resample('M',how='sum')/df_filter['Dnowinddiff'].resample('M',how='sum'))
df_bunker_contrib = copy.deepcopy(df_filter['bunker_diff'].resample('M',how='sum')/df_filter['Dnowinddiff'].resample('M',how='sum'))
df_wind_contrib = copy.deepcopy(df_filter['wind_diff'].resample('M',how='sum')/df_filter['Dnowinddiff'].resample('M',how='sum'))

#df_biomass = copy.deepcopy(df_filter['biomass']/max(df_filter['biomass'])).resample('M',how='sum')
df_rain = copy.deepcopy(df_weather['precipMM']).resample('M',how='sum')
avg_rainfall = np.array([4,1,5,6,76,296,134,130,182,243,59,6])
#df_rain = df_rain/max(df_rain)
#plt.plot(df_rain_contrib[start:end].index,df_rain_contrib[start:end])
#plt.plot(df_hydro_contrib[start:end].index,df_hydro_contrib[start:end])
#plt.plot(df_biomass_contrib[start:end].index,df_biomass_contrib[start:end])
#plt.plot(df_weather['precipMM'])
#precipitation - rain
#biomass
start = datetime(2012,1,1)
end = datetime(2012,12,31)
#plt.plot(df_rain[start:end],'k')
plt.plot(range(1,13),df_hydro_contrib[start:end],'b',label='Hydro 2012')
#plt.plot(df_biomass[start:end],'ko')
plt.plot(range(1,13),df_bunker_contrib[start:end],'r',label='Bunker 2012')
#plt.plot(df['hydro'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'k',label='2012')
#plt.plot(df['bunker'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'k--')
#plt.plot(df['biomass'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'ko')
plt.xlim([1,12])
start = datetime(2013,1,1)
end = datetime(2013,12,31)
#plt.plot(df_rain[start:end],'b')
plt.plot(range(1,13),df_hydro_contrib[start:end],'b:',label='Hydro 2013')
#plt.plot(df_biomass[start:end],'bo')
plt.plot(range(1,13),df_bunker_contrib[start:end],'r:',label='Bunker 2013')
#plt.plot(df['hydro'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'b',label='2013')
#plt.plot(df['bunker'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'b--')
#plt.plot(df['biomass'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'bo')
start = datetime(2014,1,1)
end = datetime(2014,12,31)
#plt.plot(df_rain[start:end],'r')
plt.plot(range(1,13),df_hydro_contrib[start:end],'b--',label='Hydro 2014')
#plt.plot(df_biomass[start:end],'ro')
plt.plot(range(1,13),df_bunker_contrib[start:end],'r--',label='Bunker 2014')
#plt.plot(df['hydro'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'r',label='2014')
#plt.plot(df['bunker'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'r--')
#plt.plot(df['biomass'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'ro')
#plt.plot(avg_rainfall,'g')
plt.ylabel('% Contribution')
plt.xlabel('Month of Year')
plt.title('% Contributions of Hydro and Bunker Fuel to 1-Hour Demand Ramps in Subset')
plt.legend(title="Years",bbox_to_anchor=(1.1, .9),
           bbox_transform=plt.gcf().transFigure)


#t-tests


Out[10]:
<matplotlib.legend.Legend at 0x7feb6c525610>

In [573]:
start_2013 = datetime(2013,1,1)
end_2013 = datetime(2013,12,31)
start_2014 = datetime(2014,1,1)
end_2014 = datetime(2014,12,31)
stats.ttest_ind(np.array(df_hydro_contrib[start_2013:end_2013]),np.array(df_hydro_contrib[start_2014:end_2014]))


Out[573]:
(2.3345094323409437, 0.029099276123273597)

In [537]:
start = datetime(2014,1,1)
end = datetime(2014,12,31)
plt.xlim([1,12])
#plt.plot(df_rain[start:end],'r')
plt.plot(range(1,13),df_hydro_contrib[start:end],'b--',label='Hydro 2014')
#plt.plot(df_biomass[start:end],'ro')
plt.plot(range(1,13),df_bunker_contrib[start:end],'r--',label='Bunker 2014')
#plt.plot(df['hydro'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'r',label='2014')
#plt.plot(df['bunker'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'r--')
#plt.plot(df['biomass'][start:end].resample('M')/df['Demanda'][start:end].resample('M'),'ro')
#plt.plot(avg_rainfall,'g')
plt.ylabel('% Contribution')
plt.xlabel('Month of Year')
plt.title('% Contributions of Hydro and Bunker Fuel to 1-Hour Demand Ramps in Subset')
plt.legend(title="Years",bbox_to_anchor=(1.1, .9),
           bbox_transform=plt.gcf().transFigure)


Out[537]:
<matplotlib.legend.Legend at 0x7fb77bcf8090>

In [568]:
start_2013 = datetime(2013,6,1)
end_2013 = datetime(2013,11,1)
start_2014 = datetime(2014,6,1)
end_2014 = datetime(2014,11,1)
expect_2014 = np.array(df_bunker_contrib[start_2013:end_2013])*df_filter['Dnowinddiff'].resample('M',how='sum')[start_2014:end_2014]
real_2014 = df_filter['bunker_diff'].resample('M',how='sum')[start_2014:end_2014]
plt.plot(real_2014-expect_2014)
sum(real_2014-expect_2014)


Out[568]:
3334.2171422041274

In [496]:
plt.plot(df_filter.resample('M').index,df_filter['Demanda_diff'].resample('M'),'k',label='Demand')
plt.plot(df_filter.resample('M').index,df_filter['bunker_diff'].resample('M'),'r',label='Bunker')
plt.plot(df_filter.resample('M').index,df_filter['hydro_diff'].resample('M'),'b',label='Hydro')
plt.plot(df_filter.resample('M').index,df_filter['wind_diff'].resample('M'),'g',label='Wind')
plt.plot(df_filter.resample('M').index,df_filter['biomass_diff'].resample('M'),'m',label='Biomass')
plt.plot(df_filter.resample('M').index,df_filter['interconnect_diff'].resample('M'),'k:',label='Interconnect')
plt.plot(df_filter.resample('M').index,df_filter['Dnowinddiff'].resample('M'),'k--',label='Dn')
plt.legend(title='1-Hour Ramps',bbox_to_anchor=(1.1, .9),
           bbox_transform=plt.gcf().transFigure)
ticks = plt.xticks(rotation=45)
plt.title('1-Hour Ramps of Different Generation within subset')


Out[496]:
<matplotlib.text.Text at 0x7fb77cea2690>

In [382]:



Out[382]:
[<matplotlib.lines.Line2D at 0x7fb781b1bd10>]

In [146]:
df_rain_contrib[start:end]


Out[146]:
datetime
2012-01-31    0.008540
2012-02-29    0.010188
2012-03-31    0.011137
2012-04-30    0.234119
2012-05-31    0.480324
2012-06-30    0.383290
2012-07-31    0.254694
2012-08-31    0.620855
2012-09-30    0.609852
2012-10-31    0.598182
2012-11-30    0.056516
2012-12-31    0.028466
2013-01-31    0.007641
2013-02-28    0.002097
2013-03-31    0.007042
2013-04-30    0.031013
2013-05-31    0.434778
2013-06-30    0.458000
2013-07-31    0.466940
2013-08-31    0.711396
2013-09-30    0.507391
2013-10-31    0.915302
2013-11-30    0.383690
2013-12-31    0.027117
Freq: M, Name: precipMM, dtype: float64

In [189]:
df['hydro.wind'] = df['hydro_diff']/df['Dnowinddiff']
df['bunker.wind'] = df['bunker_diff']/df['Dnowinddiff']

In [88]:
df_corr = copy.deepcopy(df.dropna())
#The Quantities Correlated
corr_df = pd.DataFrame(np.corrcoef(df_corr.T),columns=df_corr.keys()).T
corr_df.columns = df_corr.keys()
print corr_df
plt.figure()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corr_df.as_matrix(), linewidths=0, robust=True, square=False,xticklabels=df_corr.keys(),yticklabels=df_corr.keys())


                  wind   biomass  interconnect  geothermal    bunker  \
wind          1.000000  0.437450      0.193495    0.192312 -0.503679   
biomass       0.437450  1.000000      0.203706   -0.024936 -0.408895   
interconnect  0.193495  0.203706      1.000000    0.154724  0.064493   
geothermal    0.192312 -0.024936      0.154724    1.000000 -0.106047   
bunker       -0.503679 -0.408895      0.064493   -0.106047  1.000000   
hydro        -0.266350 -0.116476     -0.059271   -0.049381  0.072102   
Demanda       0.000628 -0.046632      0.098336    0.102244  0.775076   
wind_diff     0.048731  0.076041      0.023101   -0.099327 -0.109583   
hydro_diff    0.000807  0.059363      0.058625   -0.014056  0.047266   
hydro.wind    0.003036 -0.018798     -0.002527    0.010446  0.020137   
Demandnowind -0.511860 -0.264209     -0.014669   -0.010706  0.923929   

                 hydro   Demanda  wind_diff  hydro_diff  hydro.wind  \
wind         -0.266350  0.000628   0.048731    0.000807    0.003036   
biomass      -0.116476 -0.046632   0.076041    0.059363   -0.018798   
interconnect -0.059271  0.098336   0.023101    0.058625   -0.002527   
geothermal   -0.049381  0.102244  -0.099327   -0.014056    0.010446   
bunker        0.072102  0.775076  -0.109583    0.047266    0.020137   
hydro         1.000000  0.255538  -0.043188    0.239824   -0.001780   
Demanda       0.255538  1.000000  -0.119330    0.147393    0.023581   
wind_diff    -0.043188 -0.119330   1.000000   -0.218850   -0.022118   
hydro_diff    0.239824  0.147393  -0.218850    1.000000   -0.016679   
hydro.wind   -0.001780  0.023581  -0.022118   -0.016679    1.000000   
Demandnowind  0.356002  0.858747  -0.127483    0.126207    0.018702   

              Demandnowind  
wind             -0.511860  
biomass          -0.264209  
interconnect     -0.014669  
geothermal       -0.010706  
bunker            0.923929  
hydro             0.356002  
Demanda           0.858747  
wind_diff        -0.127483  
hydro_diff        0.126207  
hydro.wind        0.018702  
Demandnowind      1.000000  
Out[88]:
<matplotlib.axes.AxesSubplot at 0x7fc434cb5390>
<matplotlib.figure.Figure at 0x7fc434e28890>

In [268]:
#df_filter = copy.deepcopy(df[df['wind'].diff(1)<0][df['hydro'].diff(1)>0])

#DiegoAlg

In [396]:
start_date = datetime(2013,1,1)
end_date = datetime(2013,12,31)
df_filter = copy.deepcopy(df_orig[start_date:end_date])

df_filter['wind_diff'] = df_filter['wind'].diff(1)
df_filter['hydro_diff'] = df_filter['hydro'].diff(1)
df_filter['demand_diff'] = df_filter['Demanda'].diff(1)
df_filter['demandnowind_diff'] = (df_filter['Demanda'] - df_filter['wind']).diff(1)
df_filter['hydro_contrib'] = df_filter['hydro_diff']/(df_filter['demandnowind_diff'])
df_filter = df_filter.dropna()

In [405]:
df_heatmap = df_filter[df_filter['demand_diff']>1][df_filter['wind_diff']<-1][df_filter['hydro_diff']>1]
heatmap_load = np.zeros((365,24))
for i,val in enumerate(df_heatmap.iterrows()):
    heatmap_load[val[0].dayofyear-1][val[0].hour] = val[1]['hydro_contrib']
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load,linewidths=0, robust=True, square=False)
plt.title('Hydro Contribution to Demand Changes')
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
f.tight_layout()



In [406]:
df_heatmap = df_filter[df_filter['demand_diff']>1][df_filter['wind_diff']>1][df_filter['hydro_diff']>1]
heatmap_load = np.zeros((365,24))
for i,val in enumerate(df_heatmap.iterrows()):
    heatmap_load[val[0].dayofyear-1][val[0].hour] = val[1]['hydro_contrib']
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load,linewidths=0, robust=True, square=False)
plt.title('Hydro Contribution to Demand Changes')
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
f.tight_layout()



In [407]:
df_heatmap = df_filter[df_filter['demand_diff']<-1][df_filter['wind_diff']<-1][df_filter['hydro_diff']>1]
heatmap_load = np.zeros((365,24))
for i,val in enumerate(df_heatmap.iterrows()):
    heatmap_load[val[0].dayofyear-1][val[0].hour] = val[1]['hydro_contrib']
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load,linewidths=0, robust=True, square=False)
plt.title('Hydro Contribution to Demand Changes')
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
f.tight_layout()



In [408]:
df_heatmap = df_filter[df_filter['demand_diff']<-1][df_filter['wind_diff']>1][df_filter['hydro_diff']>1]
heatmap_load = np.zeros((365,24))
for i,val in enumerate(df_heatmap.iterrows()):
    heatmap_load[val[0].dayofyear-1][val[0].hour] = val[1]['hydro_contrib']
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap_load,linewidths=0, robust=True, square=False)
plt.title('Hydro Contribution to Demand Changes')
ax.invert_yaxis()
plt.yticks(np.arange(15,365,60), np.arange(1,len(np.arange(15,365,60))+1)*2)
f.tight_layout()



In [ ]:


In [ ]: