In [85]:
# imports
import pandas as pd
import os.path
from scipy import stats
import numpy as np
from matplotlib import cm
import csv
In [65]:
# set up BA names
facility_df = pd.read_csv(os.path.join('data/original', 'facilities_balancingauthority.csv'))
ba_names = np.sort(facility_df["BA_ABBREV"].dropna().unique())
print ba_names
In [23]:
# load data
data = {}
for ba in ba_names:
pieces = []
for yr in ['2010', '2011', '2012']:
pieces.append(pd.read_csv('data/aggregated/%s_%s_aggregate_raw.csv' % (yr, ba)))
data[ba] = pd.concat(pieces)
In [11]:
# helper functions
def delta_hr(df):
"""difference in value between this hour and the last; first value is NaN"""
return pd.rolling_apply(df, 2, lambda x: x[1]-x[0])
def get_slope(xs, ys):
"""slope of linear regression of ys on xs"""
lr = stats.linregress(xs, ys)
return lr[0]
In [26]:
# add columns for deltas
for ba in ba_names:
data[ba]['dF (MW)'] = delta_hr(data[ba]['GLOAD (MW)'])
data[ba]['dE (tons)'] = delta_hr(data[ba]['CO2_MASS (tons)'])
In [73]:
# get marginal emissions by hour
ba = ba_names[0]
beta = get_slope(data[ba]['dF (MW)'].dropna(), data[ba]['dE (tons)'].dropna()*2000)
betas_hrly = np.zeros([len(ba_names), 24])
betas_all = np.zeros(len(ba_names))
for iba, ba in enumerate(ba_names):
betas_hrly[iba] = [get_slope(group['dF (MW)'].dropna(), group['dE (tons)'].dropna()*2000) for hr, group in data[ba].groupby('OP_HOUR')]
betas_all[iba] = get_slope(data[ba]['dF (MW)'].dropna(), data[ba]['dE (tons)'].dropna()*2000)
Out[73]:
In [ ]:
# plot
plt.figure(figsize=(15,3))
plt.imshow(betas_hrly.T - betas_all, cmap=cm.RdBu_r)
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('hr')
cb = plt.colorbar()
cb.set_label('$REI_h - REI$ (lb/MW)')
plt.title('deviation of hourly marginal emissions from overall')
plt.figure(figsize=(15,3))
plt.bar(range(len(ba_names)), betas_all, align='center')
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('$REI$ (lb/MW)')
plt.title('overall marginal emissions')
plt.figure(figsize=(15,3))
plt.bar(range(len(ba_names)), np.max(betas_hrly, axis=1)-np.min(betas_hrly, axis=1), align='center')
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('$max(REI_h) - min(REI_h)$ (lb/MW)')
plt.title('max - min hourly marginal emissions')
In [84]:
# write to file
writer = csv.writer('tmp')
writer.write(['BA', 'overall REI'] + range(24))
#for iba in range(len(ba_names)):
# np.savetxt('tmp', betas_hrly, fmt='%d', delimiter=',', header=','.join([str(i) for i in range(24)]))