This notebook uses the december 2017 EIA-860m file to determine operable generating capacity by fuel category in every month from 2001-2017.
Because this method uses EIA-860 data on individual plants and generators it does not include the capacity of small scale or distributed solar. EIA does provide an estimate of small scale solar as part of the Electric Power Monthly (Table 6.1.A), although I am not sure if it is supposed to represent all installed non-utility solar in the US.
The most recent EIA-860m file should be downloaded to the EIA downloads
folder, and the correct file name should be used for loading data. Otherwise the code below can be run straight through as-is.
In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import pathlib
from pathlib import Path
import sys
from os.path import join
import json
import calendar
sns.set(style='white')
idx = pd.IndexSlice
In [2]:
capacity_type = 'net summer capacity (mw)'
# capacity_type = 'nameplate capacity (mw)'
In [3]:
%load_ext watermark
%watermark -iv -v
In [4]:
# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
In [5]:
# add the 'src' directory as one where we can import modules
cwd = os.getcwd()
src_dir = join(cwd, os.pardir, 'src')
sys.path.append(src_dir)
data_path = Path(cwd, '..', 'Data storage')
In [6]:
%aimport Analysis.index
from Analysis.index import group_fuel_cats
%aimport Analysis.capacity
from Analysis.capacity import monthly_capacity_all, monthly_capacity_year
from Analysis.capacity import monthly_ng_type_all, monthly_ng_type_year
In [7]:
file_path = data_path / 'EIA downloads' / 'december_generator2017.xlsx'
op = pd.read_excel(file_path, sheet_name='Operating', skiprows=1, skip_footer=1,
parse_dates={'op datetime': [14, 15]},
na_values=' ')
In [8]:
# need to make some helper functions for the retired generators sheet
def bad_month_values(month):
'Change value to 1 if outside 1-12'
if month > 12 or month < 1:
new_month = 1
else:
new_month = month
return new_month
def make_dt_col(df, month_col, year_col):
months = df[month_col].astype(str)
years = df[year_col].astype(str)
dt_string = years + '-' + months + '-' + '01'
dt = pd.to_datetime(dt_string)
return dt
ret = pd.read_excel(file_path, sheet_name='Retired', skiprows=1, skip_footer=1,
converters={'Operating Month': bad_month_values},
# parse_dates={'op datetime': [16, 17],
# 'ret datetime': [14, 15]},
na_values=' ')
ret['op datetime'] = make_dt_col(ret, 'Operating Month', 'Operating Year')
ret['ret datetime'] = make_dt_col(ret, 'Retirement Month', 'Retirement Year')
In [9]:
op.columns = op.columns.str.strip()
ret.columns = ret.columns.str.strip()
In [10]:
op_cols = [
'Plant ID', 'Nameplate Capacity (MW)', 'Net Summer Capacity (MW)',
'Energy Source Code', 'Prime Mover Code', 'op datetime'
]
ret_cols = [
'Plant ID', 'Nameplate Capacity (MW)', 'Net Summer Capacity (MW)',
'Energy Source Code', 'Prime Mover Code', 'Retirement Month',
'Retirement Year', 'Operating Month', 'Operating Year',
'op datetime', 'ret datetime'
]
In [11]:
op = op.loc[:, op_cols]
ret = ret.loc[:, ret_cols]
op.columns = op.columns.str.lower()
ret.columns = ret.columns.str.lower()
In [12]:
op.head()
Out[12]:
In [13]:
state_cat_path = data_path / 'Fuel categories' / 'State_facility.json'
custom_cat_path = data_path / 'Fuel categories' / 'Custom_results.json'
with open(state_cat_path) as json_data:
state_cats = json.load(json_data)
with open(custom_cat_path) as json_data:
custom_cats = json.load(json_data)
In [14]:
def reverse_cats(cat_file):
'Reverse a dict of lists so each item in the list is a key'
cat_map = {}
for key, vals in cat_file.items():
for val in vals:
cat_map[val] = key
return cat_map
In [15]:
# Aggregate EIA fuel codes to my final definitions
op['fuel'] = op.loc[:, 'energy source code'].map(reverse_cats(state_cats))
op['fuel category'] = op.loc[:, 'fuel'].map(reverse_cats(custom_cats))
ret['fuel'] = ret.loc[:, 'energy source code'].map(reverse_cats(state_cats))
ret['fuel category'] = ret.loc[:, 'fuel'].map(reverse_cats(custom_cats))
In [16]:
op.head()
Out[16]:
In [28]:
nercs_path = data_path / 'Facility labels' / 'Facility locations_RF.csv'
facility_nerc = pd.read_csv(nercs_path, index_col=['nerc', 'year'])
facility_nerc.sort_index(inplace=True)
Need to make this into a dictionary of dictionaries of lists (year, nerc, plant ids)
In [29]:
nerc_dict = {}
for year in facility_nerc.index.get_level_values('year').unique():
nerc_dict[year] = {}
for nerc in facility_nerc.index.get_level_values('nerc').unique():
nerc_dict[year][nerc] = facility_nerc.loc[idx[nerc, year], 'plant id'].tolist()
In [30]:
# Make sure there aren't lots of plants in 2016 that disappear in 2017
set(nerc_dict[2016]['MRO']) - set(nerc_dict[2017]['MRO'])
Out[30]:
In [31]:
# Define iterables to loop over
years = range(2001,2018)
months = range(1,13)
fuels = list(custom_cats.keys())
# capacity_type is defined at the top of this notebook
op_df_capacity = monthly_capacity_all(op=op, ret=ret, years=years,
nerc_plant_list=nerc_dict, fuels=fuels,
cap_type=capacity_type, n_jobs=-1,
print_year=False)
In [32]:
op_df_capacity.tail()
Out[32]:
In [36]:
# Write data to file
out_path = data_path / 'Derived data' / 'Plant capacity' / 'monthly capacity by fuel.csv'
op_df_capacity.to_csv(out_path)
In [37]:
# Define iterables to loop over
years = range(2001,2018)
months = range(1,13)
op_ng_type = monthly_ng_type_all(op=op, ret=ret, years=years,
nerc_plant_list=nerc_dict, fuels=fuels,
cap_type=capacity_type, n_jobs=-1,
print_year=False)
In [38]:
op_ng_type.head()
Out[38]:
In [39]:
out_path = data_path / 'Derived data' / 'Plant capacity' / 'monthly natural gas split.csv'
op_ng_type.to_csv(out_path)