In [85]:
# imports
import pandas as pd
import os.path
from scipy import stats
import numpy as np
from matplotlib import cm
import csv

In [65]:
# set up BA names
facility_df = pd.read_csv(os.path.join('data/original', 'facilities_balancingauthority.csv'))
ba_names = np.sort(facility_df["BA_ABBREV"].dropna().unique())
print ba_names


['AEC' 'AECI' 'AEPW' 'AVA' 'AZPS' 'BPAT' 'BREC' 'CISO' 'CLEC' 'CPLW' 'DEAA'
 'DPC' 'DUK' 'EDE' 'EEI' 'EES' 'EKPC' 'ELE' 'ERCO' 'FPC' 'FPL' 'GRDA'
 'GRIF' 'GVL' 'HGMA' 'IID' 'IPCO' 'ISNE' 'JEA' 'KACY' 'KCPL' 'LAFA' 'LDWP'
 'LEPA' 'LES' 'LGEE' 'MISO' 'MPCO' 'MPS' 'NEVP' 'NLRK' 'NPPD' 'NYIS' 'OKGE'
 'OPPD' 'OVEC' 'PACE' 'PACW' 'PJM' 'PLUM' 'PNM' 'PSCO' 'PSEI' 'PUPP' 'RC'
 'SC' 'SCEG' 'SEC' 'SECI' 'SMUD' 'SOCO' 'SPA' 'SPPC' 'SPS' 'SRP' 'TAL'
 'TEC' 'TEPC' 'TID' 'TVA' 'WACM' 'WALC' 'WAUE' 'WFEC' 'WKPL']

In [23]:
# load data
data = {}
for ba in ba_names:
    pieces = []
    for yr in ['2010', '2011', '2012']:
        pieces.append(pd.read_csv('data/aggregated/%s_%s_aggregate_raw.csv' % (yr, ba)))
    data[ba] = pd.concat(pieces)

In [11]:
# helper functions
def delta_hr(df):
    """difference in value between this hour and the last; first value is NaN"""
    return pd.rolling_apply(df, 2, lambda x: x[1]-x[0])

def get_slope(xs, ys):
    """slope of linear regression of ys on xs"""
    lr = stats.linregress(xs, ys)
    return lr[0]

In [26]:
# add columns for deltas
for ba in ba_names:
    data[ba]['dF (MW)'] = delta_hr(data[ba]['GLOAD (MW)'])
    data[ba]['dE (tons)'] = delta_hr(data[ba]['CO2_MASS (tons)'])

In [73]:
# get marginal emissions by hour
ba = ba_names[0]
beta = get_slope(data[ba]['dF (MW)'].dropna(), data[ba]['dE (tons)'].dropna()*2000)

betas_hrly = np.zeros([len(ba_names), 24])
betas_all = np.zeros(len(ba_names))
for iba, ba in enumerate(ba_names):
    betas_hrly[iba] = [get_slope(group['dF (MW)'].dropna(), group['dE (tons)'].dropna()*2000) for hr, group in data[ba].groupby('OP_HOUR')]
    betas_all[iba] = get_slope(data[ba]['dF (MW)'].dropna(), data[ba]['dE (tons)'].dropna()*2000)


Out[73]:
<matplotlib.text.Text at 0x11863a990>

In [ ]:
# plot
plt.figure(figsize=(15,3))
plt.imshow(betas_hrly.T - betas_all, cmap=cm.RdBu_r)
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('hr')
cb = plt.colorbar()
cb.set_label('$REI_h - REI$ (lb/MW)')
plt.title('deviation of hourly marginal emissions from overall')

plt.figure(figsize=(15,3))
plt.bar(range(len(ba_names)), betas_all, align='center')
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('$REI$ (lb/MW)')
plt.title('overall marginal emissions')

plt.figure(figsize=(15,3))
plt.bar(range(len(ba_names)), np.max(betas_hrly, axis=1)-np.min(betas_hrly, axis=1), align='center')
plt.xticks(range(len(ba_names)), ba_names, rotation=90)
plt.ylabel('$max(REI_h) - min(REI_h)$ (lb/MW)')
plt.title('max - min hourly marginal emissions')

In [84]:
# write to file
writer = csv.writer('tmp')
writer.write(['BA', 'overall REI'] + range(24))
#for iba in range(len(ba_names)):
#    np.savetxt('tmp', betas_hrly, fmt='%d', delimiter=',', header=','.join([str(i) for i in range(24)]))