Compile EPA emissions data

Convert the data from hourly to monthly and export all years as a single file.


In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scripts import import_group_epa, unit_conversion
from joblib import Parallel, delayed

In [6]:
if __name__ == '__main__':
    base_path = os.path.join('Clean data', 'EPA emissions')
    paths = [os.path.join(base_path, 'EPA emissions ' + str(year) + '.csv') for year in range(2001, 2017)]
    
    df_list = Parallel(n_jobs=-1)(delayed(import_group_epa)(path) for path in paths)

In [7]:
df = pd.concat(df_list)

In [8]:
df.head()


Out[8]:
CO2_MASS (kg) GLOAD (MW) HEAT_INPUT (mmBtu) MONTH OP_TIME ORISPL_CODE SLOAD (1000lb/hr) YEAR
0 9.625411e+08 1167596.0 1.113000e+07 1 4348.25 3 NaN 2001
1 8.208495e+08 880517.0 8.951275e+06 2 3391.00 3 NaN 2001
2 6.402928e+08 735523.0 7.175850e+06 3 3846.50 3 NaN 2001
3 7.417879e+08 898023.0 8.519262e+06 4 4292.75 3 NaN 2001
4 9.767243e+08 1230441.0 1.138663e+07 5 5029.75 3 NaN 2001

In [9]:
df.tail()


Out[9]:
CO2_MASS (kg) GLOAD (MW) HEAT_INPUT (mmBtu) MONTH OP_TIME ORISPL_CODE SLOAD (1000lb/hr) YEAR
16834 NaN NaN 200880.000 8 744.00 880101 148800.0 2016
16835 NaN NaN 194400.000 9 720.00 880101 144000.0 2016
16836 NaN NaN 204042.596 10 1033.20 880107 140575.0 2016
16837 NaN NaN 211593.368 11 1092.96 880107 137779.0 2016
16838 NaN NaN 337872.708 12 1492.09 880107 230323.0 2016

In [10]:
path = os.path.join('Clean data', 'Monthly EPA emissions.csv')
df.to_csv(path, index=False)