In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import os
from os.path import join
import glob
import numpy as np
from joblib import Parallel, delayed
import sys
import json
cwd = os.getcwd()
data_path = join(cwd, '..', 'Data storage')
idx = pd.IndexSlice
In [2]:
file_date = '2018-03-06'
In [3]:
# Load the "autoreload" extension
%load_ext autoreload
# always reload modules marked with "%aimport"
%autoreload 1
In [4]:
# add the 'src' directory as one where we can import modules
src_dir = join(os.getcwd(), os.pardir, 'src')
sys.path.append(src_dir)
In [5]:
%aimport Analysis.index
from Analysis.index import facility_emission_gen, group_facility_data
%aimport Analysis.index
from Analysis.index import facility_co2, adjust_epa_emissions, group_fuel_cats
%aimport Analysis.index
from Analysis.index import extra_emissions_gen, add_datetime, add_quarter
%aimport util.utils
from util.utils import rename_cols, add_facility_location
In [6]:
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE",
"FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS",
"KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS",
"MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY",
"NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
Emission factors
In [7]:
path = join(data_path, 'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)
EIA facility data and EPA monthly emissions
In [9]:
facility_path = join(data_path, 'Derived data',
'Facility gen fuels and CO2 {}.csv'.format(file_date))
facility_df = pd.read_csv(facility_path)
facility_df['state'] = facility_df.geography.str[-2:]
rename_cols(facility_df)
epa_path = join(data_path, 'Derived data',
'Monthly EPA emissions {}.csv'.format(file_date))
epa_df = pd.read_csv(epa_path)
rename_cols(epa_df)
facility_locations = pd.read_csv(join(data_path, 'Facility labels',
'Facility locations.csv'))
# Add state labels to the EPA facilities
epa_df = add_facility_location(epa_df, facility_locations, labels=['state'])
JSON files with fuel categories
In [10]:
fuel_cat_folder = join(data_path, 'Fuel categories')
state_cats_path = join(fuel_cat_folder, 'State_facility.json')
with open(state_cats_path, 'r') as f:
state_fuel_cat = json.load(f)
custom_cats_path = join(fuel_cat_folder, 'Custom_results.json')
with open(custom_cats_path, 'r') as f:
custom_fuel_cat = json.load(f)
EIA total monthly gen and fuel consumption
In [12]:
path = join(data_path, 'Derived data',
'EIA state-level gen fuel CO2 {}.csv'.format(file_date))
eia_totals = pd.read_csv(path, parse_dates=['datetime'])
rename_cols(eia_totals)
eia_totals['state'] = eia_totals.geography.str[-2:]
# Remove fuel categories that are duplicated with other categories
eia_totals = eia_totals.loc[~eia_totals.type.isin(['SPV', 'AOR', 'TSN'])]
In [16]:
index_list = []
gen_list = []
for state in states:
eia_fac_state = facility_df.loc[facility_df.state == state].copy()
eia_totals_state = eia_totals.loc[eia_totals.state == state].copy()
epa_state = epa_df.loc[epa_df.state == state].copy()
co2, gen_fuels_state = facility_emission_gen(eia_facility=eia_fac_state,
epa=epa_state,
state_fuel_cat=state_fuel_cat,
custom_fuel_cat=custom_fuel_cat,
export_state_cats=True,
print_status=False)
extra_co2, extra_gen = extra_emissions_gen(gen_fuels_state,
eia_totals_state, ef)
# Combine facility and extra co2, name the series
co2_monthly = co2.groupby(['year', 'month']).sum()
total_co2 = (co2_monthly.loc[:, 'final co2 (kg)']
+ extra_co2.loc[:, 'elec fuel co2 (kg)']
.groupby(['year', 'month']).sum())
total_co2.name = 'final co2 (kg)'
# Total gen, and the co2 intensity
total_gen = (eia_totals_state
.groupby(['year', 'month'])['generation (mwh)'].sum())
state_index = pd.concat([total_co2, total_gen], axis=1)
state_index['index (g/kwh)'] = (state_index['final co2 (kg)']
/ state_index['generation (mwh)'])
state_index['state'] = state
state_index.set_index('state', append=True, inplace=True)
# Generation by fuel category
gen_category = group_fuel_cats(eia_totals_state, custom_fuel_cat,
fuel_col='type', new_col='fuel category')
keep_cols = ['fuel category', 'generation (mwh)', 'total fuel (mmbtu)',
'elec fuel (mmbtu)', 'all fuel co2 (kg)',
'elec fuel co2 (kg)', 'year', 'month']
gen_category = gen_category[keep_cols]
gen_category['state'] = state
gen_category.set_index(['year', 'month', 'state'], inplace=True)
# Add each df to the list
index_list.append(state_index)
gen_list.append(gen_category)
# Combine lists of dataframes
state_index_all = pd.concat(index_list)
add_quarter(state_index_all)
gen_category_all = pd.concat(gen_list)
add_quarter(gen_category_all)
# output state results to file
index_fn = 'Monthly index states {}.csv'.format(file_date)
gen_fn = 'Monthly generation states {}.csv'.format(file_date)
state_index_all.to_csv(join(data_path, 'final state data', index_fn))
gen_category_all.to_csv(join(data_path, 'final state data', gen_fn))