In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last"

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from Index_Calculations import facility_index_gen, index_and_generation
import os
import glob
import numpy as np
from statsmodels.tsa.tsatools import detrend

Group state-level total gen into approx NERC regions

These definitions are taken from Pétron (2008).

Check actual generation from facilities in 2015 against the state-aggregation method


In [2]:
states_in_NERC = {'WECC': ['WA', 'OR', 'CA', 'NV', 'AZ', 'UT',
                           'ID', 'MT', 'WY', 'CO', 'NM'],
                  'MRO': ['ND', 'SD', 'NE', 'MN', 'IA', 'WI'],
                  'SPP': ['KS', 'OK'],
                  'ERCOT': ['TX'],
                  'SERC': ['MO', 'IL', 'AR', 'LA', 'MS', 'AL',
                           'GA', 'SC', 'NC', 'TN'],
                  'FRCC': ['FL'],
                  'RFC': ['MI', 'IN', 'OH', 'PA', 'WV', 'KY',
                          'VA', 'NJ', 'DE', 'MD'],
                  'NPCC': ['NY', 'VT', 'ME', 'NH', 'MA', 'RI', 'CT']}

In [24]:
for nerc, states in states_in_NERC.iteritems():
    paths = [os.path.join('Data storage', 
                          'state gen data', 
                          state + ' fuels gen.csv') for state in states]
    nerc_list = [pd.read_csv(path) for path in paths]
    nerc_df = pd.concat(nerc_list)
    nerc_df.reset_index(inplace=True, drop=True)
    
    path_out = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
    nerc_df.to_csv(path_out, index=False)

Calculate index values for each NERC region


In [25]:
facility_path = os.path.join('Data storage', 'Facility gen fuels and CO2 2017-05-25.zip')
epa_path = os.path.join('Data storage', 'Monthly EPA emissions 2017-05-25.csv')
ef_path = os.path.join('Data storage', 'Final emission factors.csv')
out_folder = os.path.join('Data storage', 'final NERC data from states')
NERC_path = os.path.join('Data storage', 'Facility NERC labels.csv')

for nerc in states_in_NERC.keys():
    all_fuel_path = os.path.join('Data storage', 'state gen data', nerc + ' fuels gen.csv')
    index_and_generation(facility_path=facility_path,
                         all_fuel_path = all_fuel_path,
                         epa_path=epa_path,
                         emission_factor_path=ef_path,
                         export_folder=out_folder, export_path_ext=' '
                         + nerc, state=states_in_NERC[nerc])

Read data and create figures


In [3]:
path = os.path.join('Data storage', 'final NERC data from states', 'Monthly index*')
mi_fns = glob.glob(path)
path = os.path.join('Data storage', 'final NERC data from states', 'Monthly gen*')
mg_fns = glob.glob(path)

In [4]:
df_list = []
for f in mi_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mi = pd.concat(df_list)
full_mi.reset_index(inplace=True, drop=True)
full_mi.rename(columns={'index (g/kWh)': 'monthly index (g/kWh)'}, inplace=True)
full_mi['datetime'] = pd.to_datetime(full_mi['datetime'])

In [5]:
df_list = []
for f in mg_fns:
    region = f.split()[-1][:-4]
    df = pd.read_csv(f)
    df['region'] = region
    df_list.append(df)
full_mg = pd.concat(df_list)
full_mg.reset_index(inplace=True, drop=True)
full_mg['datetime'] = pd.to_datetime(full_mg['datetime'])

monthly_gen = pd.pivot_table(full_mg, index=['region', 'datetime'], 
                             values='generation (MWh)', columns='fuel category 1')
monthly_gen.reset_index(inplace=True, drop=False)
monthly_gen['Year'] = monthly_gen['datetime'].dt.year
monthly_gen.replace(np.nan, 0, inplace=True)

In [6]:
gen_index = pd.merge(monthly_gen, full_mi[['datetime', 'region', 'monthly index (g/kWh)']], 
                     on=['datetime', 'region'])

In [7]:
gen_index.head()


Out[7]:
region datetime Coal Hydro Natural Gas Nuclear Other Other Renewables Solar Wind Year monthly index (g/kWh)
0 ERCOT 2001-01-01 11683911.0 138093.0 13750546.0 3545310.0 1707467.21 92021.80 0.0 83931.0 2001 638.408119
1 ERCOT 2001-02-01 10236786.0 110148.0 11507834.0 3037626.0 510769.63 81710.37 0.0 141647.0 2001 639.497007
2 ERCOT 2001-03-01 11004470.0 180140.0 13316335.0 2462837.0 447733.81 81192.19 0.0 87631.0 2001 654.137303
3 ERCOT 2001-04-01 9767225.0 124232.0 14402417.0 2668816.0 331369.06 76768.94 0.0 115487.0 2001 633.521538
4 ERCOT 2001-05-01 11449397.0 115102.0 16025878.0 3419870.0 383202.65 86697.35 0.0 103312.0 2001 638.333358

In [7]:
for region in gen_index['region'].unique():
    gen_index.loc[gen_index['region'] == region, 'Index variability'] = \
        gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).std()
    
    gen_index.loc[gen_index['region'] == region, 
                   'Normalized Index variability'] = \
         gen_index.loc[gen_index['region']==region, 'Index variability'] / \
         gen_index.loc[gen_index['region']==region, 
                       'monthly index (g/kWh)'].rolling(window=12).mean()

In [8]:
base_year = 2005

In [9]:
fuels = ['Coal', 'Natural Gas', 'Other Renewables', 'Nuclear', 'Other',
         'Solar', 'Wind', 'Hydro']
# fuels = ['Coal', 'Natural Gas', 'Renewables', 'Nuclear', 'Other']
gen_index['Total gen'] = gen_index.loc[:, fuels].sum(axis=1)
for fuel in fuels:
    # New columns that are being added
    col_percent = 'percent ' + fuel
    col_change = 'change in ' + fuel

    # Calculate percent of generation from each fuel type
    gen_index[col_percent] = gen_index.loc[:, fuel] / gen_index.loc[:, 'Total gen']

    # Percent of fuel in region in base year (entire year)
    for region in gen_index['region'].unique():
        percent_fuel_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                          (gen_index['region'] == region), fuel].sum() / gen_index.loc[(gen_index['Year'] == base_year) & 
                                                                                                     (gen_index['region'] == region), 'Total gen'].sum()

        # Use percent of fuel in base year to calculate change for each region/month
        gen_index.loc[gen_index['region'] == region, 
                      col_change] = (gen_index.loc[gen_index['region'] == region, col_percent] - percent_fuel_base) / percent_fuel_base
    
# Change in variability compared to average base year value
for region in gen_index['region'].unique():
    norm_variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Normalized Index variability'].mean()
    variability_base = gen_index.loc[(gen_index['Year'] == base_year) & 
                                  (gen_index['region'] == region), 'Index variability'].mean()
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in variability'] = (gen_index.loc[gen_index['region'] == region, 
                                                            'Index variability'] - variability_base) / variability_base
    
    gen_index.loc[gen_index['region'] == region, 
                  'change in norm variability'] = (gen_index.loc[gen_index['region'] == region, 
                                                            'Normalized Index variability'] - norm_variability_base) / norm_variability_base

In [18]:
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.plotting import *
from bokeh.layouts import gridplot, column
from bokeh.models import HoverTool, Div
from bokeh.palettes import viridis
from bokeh.charts import TimeSeries
from bokeh.io import export_svgs


/Users/Home/anaconda/lib/python2.7/site-packages/bokeh/util/deprecation.py:34: BokehDeprecationWarning: 
The bokeh.charts API has moved to a separate 'bkcharts' package.

This compatibility shim will remain until Bokeh 1.0 is released.
After that, if you want to use this API you will have to install
the bkcharts package explicitly.

  warn(message)

In [19]:
output_notebook()


Loading BokehJS ...

In [20]:
def weighted_percent(df, fuel, year):
    all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
    temp = df.loc[df['Year'] == year, all_fuels]
    temp['Total'] = temp.sum(axis=1)
    
    weighted_per = temp[fuel].sum() / temp['Total'].sum() * 100
    return weighted_per

In [13]:
gen_index.columns


Out[13]:
Index([u'region', u'datetime', u'Coal', u'Hydro', u'Natural Gas', u'Nuclear',
       u'Other', u'Other Renewables', u'Solar', u'Wind', u'Year',
       u'monthly index (g/kWh)', u'Index variability',
       u'Normalized Index variability', u'Total gen', u'percent Coal',
       u'change in Coal', u'percent Natural Gas', u'change in Natural Gas',
       u'percent Other Renewables', u'change in Other Renewables',
       u'percent Nuclear', u'change in Nuclear', u'percent Other',
       u'change in Other', u'percent Solar', u'change in Solar',
       u'percent Wind', u'change in Wind', u'percent Hydro',
       u'change in Hydro', u'change in variability',
       u'change in norm variability'],
      dtype='object')

Actual and Normalized variability over time in each NERC region

At the national level, Index variability went up after 2007, and then dipped back down briefly in 2013. This trend can be seen in a few, but not all, of the regions.

I'm not sure what the cyclical trends in Index variability represent. These values are 12 month rolling standard deviations - they shouldn't be affected by seasonal variation. Update: Upticks in the variability or normalized variability could be due to one or more months where the Index value jumped up or down well outside the normal range. This would increase the rolling standard deviation for 12 months.

The normalized variability here is a 12 month rolling standard deviation of CO2 intensity divided by the 12 month rolling average. I'm suspicious of the results in MRO.


In [12]:
sns.set_style('white', {'axes.linewidth': 1.5,
                        'axes.grid': True})
sns.set_context('notebook', font_scale=1.2)

In [13]:
def region_facet_grid(df, plot_function, x_axis, y_axis, col_order=None,
                      suptitle='', add_legend=False, ax_labels=None,
                      FG_kwargs={}, plot_kwargs={}):
    g = sns.FacetGrid(df, col_order=col_order, **FG_kwargs)
    g.map(plot_function, x_axis, y_axis, **plot_kwargs)
    g.set_xticklabels(rotation=35)
    if add_legend:
        g.add_legend()
    if suptitle:
        plt.suptitle(suptitle, y=1.02, size=15)
    if col_order and 'col' in FG_kwargs:
        axes = g.axes.flatten()
        for ax, title in zip(axes, order):
            ax.set_title(title)
    if ax_labels:
        g.set_axis_labels(ax_labels)

In [278]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Index variability', col_order=order, 
                  suptitle='Index Variability', FG_kwargs=FG_kwargs)



In [273]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order)
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='Normalized Index variability', col_order=order, 
                  suptitle='Normalized Index Variability', FG_kwargs=FG_kwargs)


Actual index values over time in each NERC region

Figure 1 of paper - also include national


In [275]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(col='region',
                 hue='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(0, 1050))

region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs)



In [280]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(0, 1050))
region_facet_grid(df=gen_index, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index', FG_kwargs=FG_kwargs)


Example of Index correlation between two regions, with and without detrending

Trying to show why detrending is needed when using the correlation to determine seasonal differences between regions.


In [38]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(200, 1050))

corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[38]:
<matplotlib.text.Text at 0x114a944d0>

In [43]:
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(200, 1050))

corr_coef = gen_index.loc[gen_index['region'].isin(order)].pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=gen_index.loc[gen_index['region'].isin(order)], plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (no change)', FG_kwargs=FG_kwargs)
plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[43]:
<matplotlib.text.Text at 0x11d00b450>

Correlation of detrended data

Trying both a linear detrend and subtracting the previous month's value.

I'm not sure if I like the method of subtracting the previous month's value. Need to see if it is retaining the seasonal behavior.


In [41]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[41]:
<matplotlib.text.Text at 0x11c9d4f90>

In [10]:
def shift_detrend(series, n):
    'Shift a series by n periods to detrend'
    detrended = series - series.shift(n)
    return detrended

In [14]:
order = ['FRCC', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))


detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                                                        'monthly index (g/kWh)'], 12)
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[14]:
<matplotlib.text.Text at 0x11a5dee90>

In [44]:
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[44]:
<matplotlib.text.Text at 0x11c9792d0>

In [15]:
order = ['ERCOT', 'SPP']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col,
                   'monthly index (g/kWh)'] = shift_detrend(detrend_df.loc[detrend_df['region'] == col,
                                                                           'monthly index (g/kWh)'], 12)
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)

plt.figtext(.55, .85, 'Pearson corr = {:.2f}'.format(corr_coef))


Out[15]:
<matplotlib.text.Text at 0x11ae1a650>

Plot all regions detrended


In [16]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=order,
                 ylim=(-300, 300))

detrend_df = gen_index.loc[gen_index['region'].isin(order)].copy()
for col in order:
    detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'] = detrend(detrend_df.loc[detrend_df['region'] == col, 'monthly index (g/kWh)'])
    
corr_coef = detrend_df.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region').corr()
corr_coef = corr_coef.iloc[1, 0]
region_facet_grid(df=detrend_df, plot_function=plt.plot, x_axis='datetime', #col_order=order,
                  add_legend=True, y_axis='monthly index (g/kWh)', col_order=order, 
                  suptitle='Monthly Index (detrended)', FG_kwargs=FG_kwargs)


Correlation in Index across regions over time

Because each region is decreasing over time, need to detrend the data before finding the correlation matrix. Using the statsmodel detrend function, with an assumption that the trends are linear (order 1).


In [303]:
nerc_index = gen_index.pivot_table(values='monthly index (g/kWh)',
                                   index='datetime', columns='region')
nerc_index_detrend = pd.DataFrame(index=nerc_index.index)
for col in nerc_index.columns:
    nerc_index_detrend.loc[:, col] = detrend(nerc_index.loc[:, col])

In [299]:
sns.heatmap(nerc_index_detrend.corr(), square=True, cmap='magma_r')


Out[299]:
<matplotlib.axes._subplots.AxesSubplot at 0x14a5338d0>
  • Are correlation patterns different in summer vs winter?
  • Does correlation change over time?

Percent of national generation from each NERC region


In [31]:
nerc_gen = gen_index.pivot(index='datetime', columns='region', values='Total gen')
regions = nerc_gen.columns
nerc_gen['Total'] = nerc_gen.sum(axis=1)
percent_nerc_gen = nerc_gen.iloc[:, :-1].divide(nerc_gen.iloc[:, -1], axis=0)

In [32]:
percent_nerc_gen.reset_index(inplace=True)

In [33]:
percent_nerc_gen = pd.melt(percent_nerc_gen, id_vars='datetime',
                           value_name='Percent Generation')

In [281]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
FG_kwargs = dict(hue='region',
                 aspect=1.5,
                 size=5,
                 hue_order=order)
region_facet_grid(df=percent_nerc_gen, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='Percent Generation', 
                  suptitle='Percent generation from each NERC region', FG_kwargs=FG_kwargs)


Generation in each NERC region in 2015


In [53]:
nerc_gen['Year'] = nerc_gen.index.year
nerc_gen.groupby('Year').sum()


Out[53]:
region ERCOT FRCC MRO NPCC RFC SERC SPP WECC Total
Year
2001 3.725800e+08 1.909453e+08 2.161632e+08 2.605056e+08 9.399041e+08 9.938657e+08 9.999798e+07 6.451816e+08 3.719144e+09
2002 3.856285e+08 2.033528e+08 2.243845e+08 2.639318e+08 9.726140e+08 1.031267e+09 1.063719e+08 6.522096e+08 3.839760e+09
2003 3.791997e+08 2.126100e+08 2.270116e+08 2.677912e+08 9.679986e+08 1.036856e+09 1.071944e+08 6.671349e+08 3.865796e+09
2004 3.902991e+08 2.181179e+08 2.255123e+08 2.714832e+08 9.882323e+08 1.059385e+09 1.075122e+08 6.920395e+08 3.952582e+09
2005 3.966687e+08 2.202564e+08 2.289179e+08 2.830366e+08 1.018798e+09 1.074424e+09 1.144705e+08 7.005246e+08 4.037097e+09
2006 4.005829e+08 2.237516e+08 2.300444e+08 2.744769e+08 9.998087e+08 1.070732e+09 1.161386e+08 7.308524e+08 4.046387e+09
2007 4.054923e+08 2.254161e+08 2.374609e+08 2.784050e+08 1.022114e+09 1.106244e+09 1.229413e+08 7.402408e+08 4.138315e+09
2008 4.047878e+08 2.196368e+08 2.435205e+08 2.674164e+08 1.000488e+09 1.085137e+09 1.229592e+08 7.572186e+08 4.101164e+09
2009 3.971679e+08 2.179523e+08 2.407059e+08 2.548166e+08 9.153819e+08 1.049575e+09 1.217441e+08 7.352388e+08 3.932583e+09
2010 4.116950e+08 2.290959e+08 2.569122e+08 2.666871e+08 9.769732e+08 1.116945e+09 1.201745e+08 7.287731e+08 4.107256e+09
2011 4.354574e+08 2.218946e+08 2.559552e+08 2.608021e+08 9.513253e+08 1.096300e+09 1.199656e+08 7.406095e+08 4.082310e+09
2012 4.298125e+08 2.210961e+08 2.549966e+08 2.566524e+08 9.218282e+08 1.078700e+09 1.223212e+08 7.447823e+08 4.030189e+09
2013 4.333802e+08 2.223989e+08 2.561657e+08 2.515223e+08 9.307387e+08 1.082139e+09 1.221463e+08 7.505837e+08 4.049075e+09
2014 4.377934e+08 2.301349e+08 2.618739e+08 2.491858e+08 9.424851e+08 1.091380e+09 1.198891e+08 7.551188e+08 4.087861e+09
2015 4.500498e+08 2.375687e+08 2.667626e+08 2.506158e+08 9.148136e+08 1.087641e+09 1.216704e+08 7.453607e+08 4.074483e+09
2016 4.558160e+08 2.381459e+08 2.651052e+08 2.443398e+08 9.231032e+08 1.080953e+09 1.258235e+08 7.460478e+08 4.079335e+09
2017 9.915130e+07 5.071177e+07 6.622780e+07 5.712312e+07 2.159938e+08 2.473217e+08 2.929221e+07 1.810598e+08 9.468813e+08

Percent of each fuel used in each NERC region


In [107]:
all_fuels = ['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
                 'Wind', 'Solar', 'Hydro']
value_cols = ['percent {}'.format(fuel) for fuel in all_fuels]

percent_gen_df = pd.melt(gen_index, id_vars=['region', 'datetime'], 
        value_vars=value_cols, value_name='percent generation',
                        var_name='fuel')
percent_gen_df['fuel'] = percent_gen_df['fuel'].map(lambda x: x.split()[-1])
percent_gen_df['fuel'].replace('Renewables', 'Other Renewables', inplace=True)
percent_gen_df['fuel'].replace('Gas', 'Natural Gas', inplace=True)

In [38]:
percent_gen_df.head()


Out[38]:
region datetime fuel percent generation
0 ERCOT 2001-01-01 Coal 0.376885
1 ERCOT 2001-02-01 Coal 0.399461
2 ERCOT 2001-03-01 Coal 0.398997
3 ERCOT 2001-04-01 Coal 0.355349
4 ERCOT 2001-05-01 Coal 0.362512

This could be figure 2?


In [108]:
percent_gen_df['fuel'].unique()


Out[108]:
array(['Coal', 'Natural Gas', 'Nuclear', 'Other', 'Other Renewables',
       'Wind', 'Solar', 'Hydro'], dtype=object)

In [286]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
fuel_order = ['Coal', 'Natural Gas', 'Nuclear', 'Hydro', 'Wind', 'Solar',
              'Other', 'Other Renewables']
FG_kwargs = dict(hue='fuel',
                 col='region',
                 col_wrap=3,
                 aspect=1.2,
                 hue_order=fuel_order)
region_facet_grid(df=percent_gen_df, plot_function=plt.plot, x_axis='datetime',
                  add_legend=True, y_axis='percent generation', col_order=order, 
                  suptitle='', FG_kwargs=FG_kwargs)



In [23]:
def region_bokeh_plot(df, fuel, x_axis, y_axis, x_range=None, y_range=None,
                     x_lower_lim=0, x_upper_lim=None):
    figs = {}
    if not x_upper_lim:
        x_upper_lim = df[x_axis].max()
        
    for region in df['region'].unique():
        temp = gen_index.loc[gen_index['region'] == region]
        max_change = temp[x_axis].max()
        
        if max_change <= x_upper_lim and max_change >= x_lower_lim:
            
            source = ColumnDataSource(data=dict(
            x=temp[x_axis],
            y=temp[y_axis],
            datetime=temp['datetime'],
            colors= viridis(len(temp))
            ))
            
            hover = HoverTool(tooltips=[
                                    ("(x,y)", "($x, $y)"),
                                    ("datetime", "@datetime{%F}")],
                             formatters={
                                     'datetime': 'datetime', # use 'datetime' formatter
                                        })
            hover.point_policy = "snap_to_data"

            fuel_2005 = weighted_percent(temp, fuel, 2005)
            fuel_2016 = weighted_percent(temp, fuel, 2016)
            title = region + ' {:.1f}% to {:.1f}% {}'.format(fuel_2005, 
                                                             fuel_2016, fuel)

            figs[region] = figure(title=title, tools=[hover], y_range=y_range, x_range=x_range,
                                 output_backend="webgl")
            figs[region].circle('x', 'y', source=source, color='colors', size=12, alpha=0.5)
            figs[region].xaxis.axis_label = x_axis
            figs[region].yaxis.axis_label = y_axis

    plots = figs.values()
    grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)

    show(grid)

In [ ]:
figs = {}

for region in percent_gen_df['region'].unique():
    temp = percent_gen_df.loc[percent_gen_df['region'] == region]
    pivoted = pd.pivot_table(temp, index='datetime', columns='fuel')
    pivoted.reset_index(inplace=True, drop=False)

    source = ColumnDataSource(data=dict(
                                        x=pivoted['datetime'],
                                        y=pivoted['percent generation'],
                                        datetime=pivoted['datetime']
                                        ))

#     p = TimeSeries(pivoted, tools='hover')
    hover = HoverTool(tooltips=[
                            ("fuel", "@fuel"),
                            ('% gen', '$y'),
                            ("datetime", "@datetime{%F}")],
                     formatters={
                             'datetime': 'datetime', # use 'datetime' formatter
                                })
    hover.point_policy = "snap_to_data"
    hover.mode = 'vline'
    figs[region] = figure(title=region, tools=[hover],
                         output_backend="webgl")
    figs[region].multi_line('x', 'y', source=source)
    figs[region].xaxis.axis_label = 'datetime'
    figs[region].yaxis.axis_label = 'percent generation'

plots = figs.values()
grid = gridplot(plots, ncols=3, plot_width=240, plot_height=240)

show(grid)

Variability scatter plots

Why is variability important?

Temporal variability is a sign that decarbonization is happening, but unevenly across the year. If the variability is not correlated between regions, better transmission would allow for a more even spread of low-carbon electricity. As the percent of those technologies grows, each region could export excess low(er)-carbon power. But if they are negatively correlated, then each region is going to be generating higher-CO2 power at the same time. New technologies will be necessary to deal with this.

Ines: Is there a point where variability becomes of return?

  • This is seasonal (monthly data). Grid reliability reports might be more at the hourly level.
  • Not even a variability issue. It's just a temporal correlation of Index.

Paper results subsections (order could change - method will be in the same order, short paragraphs that explain each):

  • Index
  • Generagion by region
  • State-level
  • Correlation across regions
  • Variability

Plot ideas (figures 3, 4, and 5):

  • Possibly emissions per capita or GDP (could be at state level) - ranking by state? Index, change in index, index per capita (See Sam's figure 2 - bar plots next to each other )
  • Coal on X, NG (or fuel) on Y, variability coded by color (or 3 variable triangle plot?)
  • Small multiples of variability vs fuel for a single region

For results:

  • Think of 1 figure or table for each subsection
  • Write 3 sentence punchline about each

Coal

This set of plots - and the ones repeated below - show:

  1. The percent generation from coal in a given month against the percent change in normalized variability from a 2005 annual average
  2. The percent change in coal generation from 2005 annual average against the percent change in normalized variability from a 2005 annual average

Analysis:

The cleanest relationship that I see is that as coal goes down, variability goes up. The trend is pretty linear, and extends across a bunch of different NERC regions. A few interesting notes:

  • SPP sees a steep decline in coal use from ~80% to 20-30%, but the variability doesn't go up until percent coal drops below 40%
  • In contrast, RFC sees an immediate linear increase in variability from a similar starting point
  • FRCC doesn't see much of an increase in variability despite the drop in coal from ~40% to 10-20%
  • The other NERC regions (NPCC, ERCOT, WECC, SERC, RFC, and MRO) all have similar linear responses to the drop in coal use even though they have different starting points for percent of coal

Percent Coal vs Change in norm variability


In [24]:
region_bokeh_plot(gen_index, 'Coal', 'percent Coal', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.85))


Change in Coal vs Change in norm variability


In [25]:
region_bokeh_plot(gen_index, 'Coal', 'change in Coal', 
                  'change in norm variability', y_range=(-1, 7))


Wind

Percent Wind vs Change in norm variability


In [26]:
region_bokeh_plot(gen_index, 'Wind', 'percent Wind', 
                  'change in norm variability', 
                  y_range=(-1, 7), x_range=(0, 0.35))


Change in Wind vs Change in norm variability


In [27]:
region_bokeh_plot(gen_index, 'Wind', 'change in Wind', 
                  'change in norm variability', y_range=(-1, 7))


Natural gas

Percent Natural Gas vs Change in norm variability


In [28]:
region_bokeh_plot(gen_index, 'Natural Gas', 'percent Natural Gas', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.75))


Change in Natural Gas vs Change in norm variability


In [29]:
region_bokeh_plot(gen_index, 'Natural Gas', 'change in Natural Gas', 
                  'change in norm variability', y_range=(-1, 7))


Hydro

Percent Hydro vs Change in norm variability


In [30]:
region_bokeh_plot(gen_index, 'Hydro', 'percent Hydro', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.45))


Change in Hydro vs Change in norm variability


In [31]:
region_bokeh_plot(gen_index, 'Hydro', 'change in Hydro', 
                  'change in norm variability', y_range=(-1, 7))


Solar

Percent Solar vs Change in norm variability


In [32]:
region_bokeh_plot(gen_index, 'Solar', 'percent Solar', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.08))


Change in Solar vs Change in norm variability

Most NERC regions didn't have recorded solar in 2005, so this plot doesn't make much sense


In [33]:
region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 
                  'change in norm variability', y_range=(-1, 7))


Nuclear

Percent Solar vs Change in norm variability


In [34]:
region_bokeh_plot(gen_index, 'Nuclear', 'percent Nuclear', 
                  'change in norm variability',
                  y_range=(-1, 7), x_range=(0, 0.35))



In [35]:
region_bokeh_plot(gen_index, 'Solar', 'change in Solar', 
                  'change in norm variability', y_range=(-1, 7))



In [235]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
                  col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Natural Gas', 
      'Normalized Index variability', alpha=0.5, s=100,
      vmin=vmin, vmax=vmax, cmap=cmap)

# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)

# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)

# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')


Out[235]:
<seaborn.axisgrid.FacetGrid at 0x150bab710>
Out[235]:
<matplotlib.text.Text at 0x151356a10>

In [172]:
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
def facet_scatter(x, y, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index['Normalized Index variability'].min()
vmax = gen_index['Normalized Index variability'].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

g = sns.FacetGrid(gen_index, col='region', col_wrap=3, aspect=1.2,
                  col_order=order, palette='viridis')
g.map(facet_scatter, 'percent Coal', 'percent Wind', 
      'Normalized Index variability', alpha=0.5, s=100,
      vmin=vmin, vmax=vmax, cmap=cmap)

# Make space for the colorbar
# g.fig.subplots_adjust(right=.92)

# Define a new Axes where the colorbar will go
# cax = g.fig.add_axes([.94, .25, .02, .6])
cax = g.fig.add_axes([.82, .05, .02, .23])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)

# Draw the colorbar
g.fig.colorbar(points, cax=cax).outline.set_visible(False)
g.fig.text(.84, .3, 'Normalized Variability', ha='center')


Out[172]:
<seaborn.axisgrid.FacetGrid at 0x13d980c90>
Out[172]:
<matplotlib.text.Text at 0x13aef74d0>

In [173]:
import ternary

In [175]:
points = gen_index

figure, tax = ternary.figure(scale=1)
tax.scatter()



In [227]:
color_col = 'Normalized Index variability'
order = ['MRO', 'SPP', 'RFC', 'ERCOT', 'FRCC', 'SERC',
         'WECC', 'NPCC']
gen_index['points'] = gen_index[['percent Coal', 'percent Natural Gas', 'percent Wind']].apply(tuple, axis=1)
def ternary_facet_scatter(points, c, **kwargs):
    """Draw scatterplot with point colors from a faceted DataFrame columns."""
    kwargs.pop("color")
    scale = 1
    ax = plt.gca()
    figure, tax = ternary.figure(ax=ax, scale=scale)
    tax.boundary(linewidth = 1.0)
#     tax.gridlines(multiple=10,color="blue")
    tax.scatter(points, c=c, **kwargs)
    tax.left_axis_label("Percent Wind")
    tax.right_axis_label("Percent Natural Gas")
    tax.bottom_axis_label("Percent Coal")
    tax.gridlines(multiple=.25, color='k', linestyle='-')
    tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
    tax.clear_matplotlib_ticks()
#     plt.scatter(x, y, c=c, **kwargs)

vmin = gen_index[color_col].min()
vmax = gen_index[color_col].max()
cmap = plt.get_cmap('viridis')
#sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)

with sns.axes_style('white', {'axes.linewidth': 0,
                             'axes.grid': False}):
    g = sns.FacetGrid(gen_index, col='region', col_wrap=3,
                      col_order=order, palette='viridis')
    g.map(ternary_facet_scatter, 'points',
          color_col, alpha=0.5, s=100, cmap=cmap,
          vmin=vmin, vmax=vmax)

    # Make space for the colorbar
    # g.fig.subplots_adjust(right=.92)

    # Define a new Axes where the colorbar will go
    # cax = g.fig.add_axes([.94, .25, .02, .6])
    cax = g.fig.add_axes([.82, .05, .02, .23])
    # Get a mappable object with the same colormap as the data
    points = plt.scatter([], [], c=[], cmap=cmap, vmin=vmin, vmax=vmax)

    # Draw the colorbar
    g.fig.colorbar(points, cax=cax).outline.set_visible(False)
    g.fig.text(.84, .3, 'Normalized Variability', ha='center')
    axes = g.axes.flatten()
    for ax, title in zip(axes, order):
        ax.set_title(title)
        ax.set_xlabel('')
        ax.set_ylabel('')


Out[227]:
<seaborn.axisgrid.FacetGrid at 0x14d450990>
Out[227]:
<matplotlib.text.Text at 0x1503e67d0>
Out[227]:
<matplotlib.text.Text at 0x14f7e8490>
Out[227]:
<matplotlib.text.Text at 0x14f5a7090>
Out[227]:
<matplotlib.text.Text at 0x14f791490>
Out[227]:
<matplotlib.text.Text at 0x14f728610>
Out[227]:
<matplotlib.text.Text at 0x14f6f6a50>
Out[227]:
<matplotlib.text.Text at 0x14f70d090>
Out[227]:
<matplotlib.text.Text at 0x14f77e710>
Out[227]:
<matplotlib.text.Text at 0x14f9fc850>
Out[227]:
<matplotlib.text.Text at 0x14f76de50>
Out[227]:
<matplotlib.text.Text at 0x14fa3d210>
Out[227]:
<matplotlib.text.Text at 0x14fa28450>
Out[227]:
<matplotlib.text.Text at 0x14fa3d310>
Out[227]:
<matplotlib.text.Text at 0x14fbbbb50>
Out[227]:
<matplotlib.text.Text at 0x14fb99690>
Out[227]:
<matplotlib.text.Text at 0x14fa28f90>
Out[227]:
<matplotlib.text.Text at 0x14fc2c390>
Out[227]:
<matplotlib.text.Text at 0x14fbffcd0>
Out[227]:
<matplotlib.text.Text at 0x14fc16b90>
Out[227]:
<matplotlib.text.Text at 0x14a1e7350>
Out[227]:
<matplotlib.text.Text at 0x14fc86ad0>
Out[227]:
<matplotlib.text.Text at 0x14fc9c990>
Out[227]:
<matplotlib.text.Text at 0x14eb1f7d0>
Out[227]:
<matplotlib.text.Text at 0x14ec62110>
Out[227]:
<matplotlib.text.Text at 0x14ea27290>

In [232]:
df = gen_index.loc[gen_index['region'] == 'FRCC']
points = df['points']
c = df['Normalized Index variability']
scale = 1
# ax = plt.gca()
figure, tax = ternary.figure(scale=scale)
tax.boundary(linewidth = 1.0)
#     tax.gridlines(multiple=10,color="blue")
tax.scatter(points, c=c)
tax.left_axis_label("Percent Wind")
tax.right_axis_label("Percent Natural Gas")
tax.bottom_axis_label("Percent Coal")
tax.gridlines(multiple=.25, color='k', linestyle='-')
tax.ticks(axis='lbr', multiple=0.25, linewidth=0)
tax.clear_matplotlib_ticks()
tax.show()


Out[232]:
<matplotlib.axes._subplots.AxesSubplot at 0x150a3ad90>