In [ ]:
%load_ext autoreload
%autoreload 2
In [ ]:
import pandas as pd
import pudl
import pudl.constants as pc
import pudl.extract.ferc1
import sqlalchemy as sa
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
%matplotlib inline
In [ ]:
# For some reason these things don't stick if they're in the same cell as the
# %matplotlib inline call above, but if they're separate, they work fine.
sns.set()
mpl.rcParams['figure.figsize'] = (10,6)
mpl.rcParams['figure.dpi'] = 100
pd.options.display.max_columns = 56
In [ ]:
start_year = 2011
end_year = 2017
pudl_settings = pudl.workspace.setup.get_defaults()
pudl_engine = sa.create_engine(pudl_settings['pudl_db'])
pudl_out_ferc1 = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine)
steam_ferc1 = (
pudl_out_ferc1.plants_steam_ferc1().
query("report_year>=@start_year and report_year<=@end_year")
)
fuel_ferc1 = (
pudl_out_ferc1.fuel_ferc1().
drop_duplicates(subset=['record_id']).
query("report_year>=@start_year and report_year<=@end_year")
)
pudl_out_eia = pudl.output.pudltabl.PudlTabl(pudl_engine=pudl_engine, freq="AS")
mcoe_df = pudl_out_eia.mcoe()
In [ ]:
nf = pudl.transform.ferc1.fuel_by_plant_ferc1(fuel_ferc1)
nf.sample(10)
In [ ]:
key_cols = [
'report_year',
'utility_id_ferc1',
'plant_name_ferc1',
]
ferc1_plants = (
pd.merge(nf, steam_ferc1, on=key_cols, how='inner').
assign(heat_rate_mmbtu_mwh=lambda x: x.fuel_mmbtu / x.net_generation_mwh).
merge(steam_ferc1[key_cols+['utility_id_pudl', 'utility_name_ferc1','plant_id_pudl', 'plant_id_ferc1']]).
query(f'report_year >= {start_year}')
)
ferc1_coal_plants = ferc1_plants.query("primary_fuel_by_mmbtu=='coal'")
ferc1_gas_plants = ferc1_plants.query("primary_fuel_by_mmbtu=='gas'")
eia_plants = mcoe_df.assign(report_year=lambda x: x.report_date.dt.year)
eia_coal_plants = eia_plants.query("fuel_type_code_pudl=='coal'")
eia_gas_plants = eia_plants.query("fuel_type_code_pudl=='gas'")
In [ ]:
fig, (ferc1_ax, eia_ax) = plt.subplots(ncols=1, nrows=2, figsize=(17,8))
ylabel = "Fuel Consumed (mmBTU)"
xlabel = "Heat Rate (mmBTU/MWh)"
hr_range = (6,14)
nbins=40
pdf=True
ferc1_ax.hist(ferc1_coal_plants.heat_rate_mmbtu_mwh,
weights=ferc1_coal_plants.net_generation_mwh, alpha=0.5, color="black",
label="Coal", density=pdf, range=hr_range, bins=nbins)
ferc1_ax.hist(ferc1_gas_plants.heat_rate_mmbtu_mwh,
weights=ferc1_gas_plants.net_generation_mwh, alpha=0.5, color="blue",
label="Gas", density=pdf, range=hr_range, bins=nbins)
ferc1_ax.set_ylabel(ylabel)
ferc1_ax.set_title(f"FERC Form 1 Heat Rates {start_year}-{end_year}")
ferc1_ax.legend()
eia_ax.hist(eia_coal_plants.heat_rate_mmbtu_mwh,
weights=eia_coal_plants.net_generation_mwh, alpha=0.5, color="black",
label="Coal", density=pdf, range=hr_range, bins=nbins)
eia_ax.hist(eia_gas_plants.heat_rate_mmbtu_mwh,
weights=eia_gas_plants.net_generation_mwh, alpha=0.5, color="blue",
label="Gas", density=pdf, range=hr_range, bins=nbins)
eia_ax.set_ylabel(ylabel)
eia_ax.set_xlabel(xlabel)
eia_ax.set_title(f"EIA 923 Heat Rates {start_year}-{end_year}")
eia_ax.legend()
plt.tight_layout()
plt.savefig("heat_rate_hist_ferc1_vs_eia.png")
plt.show()
In [ ]:
fig, (ferc1_ax, eia_ax) = plt.subplots(ncols=1, nrows=2, figsize=(17,8))
xlabel = "Fuel Cost ($/MWh)"
ylabel = "Fuel Cost ($)"
cost_range = (0,80)
nbins=40
pdf=True
ferc1_ax.hist((ferc1_coal_plants.opex_fuel/ferc1_coal_plants.net_generation_mwh),
range=cost_range,
bins=nbins,
weights=ferc1_coal_plants.net_generation_mwh,
label="Coal",
density=pdf,
color="black",
alpha=0.5)
ferc1_ax.hist((ferc1_gas_plants.opex_fuel/ferc1_gas_plants.net_generation_mwh),
range=cost_range,
bins=nbins,
weights=ferc1_gas_plants.net_generation_mwh,
label="Gas",
density=pdf,
color="blue",
alpha=0.5)
ferc1_ax.set_ylabel(ylabel)
ferc1_ax.set_title(f"FERC Form 1 Fuel Costs {start_year}-{end_year}")
ferc1_ax.legend()
eia_ax.hist((eia_coal_plants.fuel_cost_per_mwh),
range=cost_range,
bins=nbins,
weights=eia_coal_plants.net_generation_mwh,
label="Coal",
density=pdf,
color="black",
alpha=0.5)
eia_ax.hist((eia_gas_plants.fuel_cost_per_mwh),
range=cost_range,
bins=nbins,
weights=eia_gas_plants.net_generation_mwh,
label="Gas",
density=pdf,
color="blue",
alpha=0.5)
eia_ax.set_ylabel(ylabel)
eia_ax.set_xlabel(xlabel)
eia_ax.set_title(f"EIA 923 Fuel Costs {start_year}-{end_year}")
eia_ax.legend()
plt.savefig("fuel_cost_hist_ferc1_vs_eia.png")
plt.tight_layout()
plt.show()
report_year
, fuel_type_code_pudl
, plant_id_pudl
]Stuff to plot (Annual):
In [ ]:
pudl_plant_cols = [
'plant_id_pudl',
'primary_fuel_by_mmbtu',
'report_year',
]
eia_dude = (
eia_plants.
rename(columns={'total_fuel_cost': 'opex_fuel',
'fuel_type_code_pudl': 'primary_fuel_by_mmbtu'}).
groupby(pudl_plant_cols)['net_generation_mwh',
'capacity_mw',
'opex_fuel',
'total_mmbtu'].
agg(sum).reset_index().
assign(fuel_cost_per_mwh=lambda x: x.opex_fuel / x.net_generation_mwh).
assign(fuel_cost_per_mmbtu=lambda x: x.opex_fuel / x.total_mmbtu).
assign(heat_rate_mmbtu_mwh=lambda x: x.total_mmbtu / x.net_generation_mwh).
assign(capacity_factor=lambda x: x.net_generation_mwh / (8760*x.capacity_mw))
)
ferc1_dude = (
ferc1_plants.
rename(columns={'fuel_mmbtu': 'total_mmbtu'}).
groupby(pudl_plant_cols)['net_generation_mwh',
'capacity_mw',
'opex_fuel',
'opex_production_total',
'total_mmbtu'].
agg(sum).reset_index().
assign(opex_nonfuel_ferc=lambda x: x.opex_production_total - x.opex_fuel).
drop('opex_production_total', axis=1).
assign(fuel_cost_per_mwh=lambda x: x.opex_fuel / x.net_generation_mwh).
assign(fuel_cost_per_mmbtu=lambda x: x.opex_fuel / x.total_mmbtu).
assign(heat_rate_mmbtu_mwh=lambda x: x.total_mmbtu / x.net_generation_mwh).
assign(capacity_factor=lambda x: x.net_generation_mwh / (8760*x.capacity_mw))
)
pudl_dude = pd.merge(ferc1_dude, eia_dude, suffixes=('_ferc1', '_eia'), on=pudl_plant_cols)
In [ ]:
def plot_eia_v_ferc1(df, fields_to_plot, xy_limits, scale="linear"):
"""
Make plots to compare FERC & EIA reported values for Coal & Gas plants.
For each of the fields specified in fields_to_plot, create a pair of plots,
one for 'gas' and one for 'coal' in the same frame, showing the EIA quantity
vs. the FERC quantity in a scatter plot.
"""
for field in fields_to_plot:
field_eia = field+'_eia'
field_ferc1 = field+'_ferc1'
fig, (coal_ax, gas_ax) = plt.subplots(ncols=2, nrows=1, figsize=(17, 8))
coal_ax.scatter(df.query("primary_fuel_by_mmbtu=='coal'")[field_eia],
df.query("primary_fuel_by_mmbtu=='coal'")[field_ferc1],
color='black', alpha='0.1', label=field)
coal_ax.set_xlim(xy_limits[field][0],xy_limits[field][1])
coal_ax.set_ylim(xy_limits[field][0],xy_limits[field][1])
coal_ax.set_xlabel('EIA')
coal_ax.set_yscale(scale)
coal_ax.set_xscale(scale)
coal_ax.set_ylabel('FERC Form 1')
coal_ax.set_title(f"{field} (Coal)")
gas_ax.scatter(df.query("primary_fuel_by_mmbtu=='gas'")[field_eia],
df.query("primary_fuel_by_mmbtu=='gas'")[field_ferc1],
color='blue', alpha='0.1', label=field)
gas_ax.set_xlim(xy_limits[field][0],xy_limits[field][1])
gas_ax.set_ylim(xy_limits[field][0],xy_limits[field][1])
gas_ax.set_yscale(scale)
gas_ax.set_xscale(scale)
gas_ax.set_xlabel('EIA')
gas_ax.set_ylabel('FERC Form 1')
gas_ax.set_title(f"{field} (Gas)")
plt.tight_layout()
plt.savefig(f"{field}_ferc1_vs_eia.png")
In [ ]:
log_fields = [
'capacity_mw',
'opex_fuel',
'total_mmbtu',
'net_generation_mwh',
]
log_limits = {
'capacity_mw': (1e1, 1e4),
'opex_fuel': (1e6, 1e9),
'total_mmbtu': (1e5,1e9),
'net_generation_mwh': (1e4,1e8),
}
linear_fields = [
'capacity_factor',
'heat_rate_mmbtu_mwh',
'fuel_cost_per_mwh',
'fuel_cost_per_mmbtu',
]
linear_limits = {
'capacity_factor': (0,1.0),
'heat_rate_mmbtu_mwh': (6,16),
'fuel_cost_per_mwh': (10,80),
'fuel_cost_per_mmbtu': (0,6),
}
plot_eia_v_ferc1(pudl_dude, log_fields, log_limits, scale="log")
plot_eia_v_ferc1(pudl_dude, linear_fields, linear_limits, scale="linear")
In [ ]: