In [1]:
import pandas as pd
import numpy as np

In [2]:
fn1 = 'EPA emissions.txt'
fn2 = 'may_generator2016.xlsx'
fn3 = 'EIA923_Schedules_2_3_4_5_M_10_2016.xlsx'

Load emissions data


In [3]:
emissions = pd.read_csv(fn1)

In [4]:
emissions.head()


Out[4]:
State Facility Name Facility ID (ORISPL) Unit ID Associated Stacks Month Year Program(s) Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region NERC Region Unit Type Fuel Type (Primary)
AL AMEA Sylacauga Plant 56018 1 NaN 5 2016 CSNOX, CSNOXOS, CSSO2G2, ARP 28.00 1149.0 NaN 0.004 0.0759 0.473 732.800 12428.200 4 SERC Combustion turbine Pipeline Natural Gas NaN
AL AMEA Sylacauga Plant 56018 2 NaN 5 2016 CSNOX, CSNOXOS, CSSO2G2, ARP 17.00 642.0 NaN 0.002 0.0809 0.283 412.300 6984.600 4 SERC Combustion turbine Pipeline Natural Gas NaN
AL Barry 3 1 CS0AAN 5 2016 ARP, CSNOX, CSNOXOS, CSSO2G2 2.75 11.0 NaN 0.005 0.0080 0.001 14.213 240.050 4 SERC Tangentially-fired Pipeline Natural Gas NaN
AL Barry 3 2 CS0AAN 5 2016 ARP, CSNOX, CSNOXOS, CSSO2G2 2.75 11.0 NaN 0.005 0.0080 0.001 14.213 240.050 4 SERC Tangentially-fired Pipeline Natural Gas NaN
AL Barry 3 4 NaN 5 2016 ARP, MATS, CSNOX, CSNOXOS, CSSO2G2 725.25 116336.0 NaN 572.537 0.4319 265.224 128402.000 1251483.675 4 SERC Tangentially-fired Coal NaN

In [7]:
emissions = pd.read_csv(fn1, index_col=False)

In [8]:
emissions.head()


Out[8]:
State Facility Name Facility ID (ORISPL) Unit ID Associated Stacks Month Year Program(s) Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region NERC Region Unit Type Fuel Type (Primary)
0 AL AMEA Sylacauga Plant 56018 1 NaN 5 2016 CSNOX, CSNOXOS, CSSO2G2, ARP 28.00 1149.0 NaN 0.004 0.0759 0.473 732.800 12428.200 4 SERC Combustion turbine Pipeline Natural Gas
1 AL AMEA Sylacauga Plant 56018 2 NaN 5 2016 CSNOX, CSNOXOS, CSSO2G2, ARP 17.00 642.0 NaN 0.002 0.0809 0.283 412.300 6984.600 4 SERC Combustion turbine Pipeline Natural Gas
2 AL Barry 3 1 CS0AAN 5 2016 ARP, CSNOX, CSNOXOS, CSSO2G2 2.75 11.0 NaN 0.005 0.0080 0.001 14.213 240.050 4 SERC Tangentially-fired Pipeline Natural Gas
3 AL Barry 3 2 CS0AAN 5 2016 ARP, CSNOX, CSNOXOS, CSSO2G2 2.75 11.0 NaN 0.005 0.0080 0.001 14.213 240.050 4 SERC Tangentially-fired Pipeline Natural Gas
4 AL Barry 3 4 NaN 5 2016 ARP, MATS, CSNOX, CSNOXOS, CSSO2G2 725.25 116336.0 NaN 572.537 0.4319 265.224 128402.000 1251483.675 4 SERC Tangentially-fired Coal

In [13]:
emissions.tail()


Out[13]:
State Facility Name Facility ID (ORISPL) Unit ID Associated Stacks Month Year Program(s) Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region NERC Region Unit Type Fuel Type (Primary)
3919 WY Neil Simpson II (CT2) 55477 CT2 NaN 5 2016 ARP 2.16 69.00 NaN 0.000 0.0485 0.020 39.911 672.960 8 NaN Combustion turbine Pipeline Natural Gas
3920 WY Wygen I 55479 001 NaN 5 2016 ARP, MATS 662.62 58852.88 NaN 24.672 0.1298 43.135 68889.530 656851.004 8 NaN Dry bottom wall-fired boiler Coal
3921 WY Wygen II 56319 001 NaN 5 2016 ARP, MATS 744.00 69215.00 NaN 17.378 0.0534 20.303 79420.900 757259.300 8 WECC Dry bottom wall-fired boiler Coal
3922 WY Wygen III 56596 001 NaN 5 2016 ARP, MATS 581.64 62642.20 NaN 16.041 0.0481 12.642 67689.944 645401.544 8 WECC Dry bottom wall-fired boiler Coal
3923 WY Wyodak 6101 BW91 NaN 5 2016 ARP, MATS 0.00 NaN NaN NaN NaN NaN NaN NaN 8 WECC Dry bottom wall-fired boiler Coal

Access parts of the dataframe


In [20]:
emissions.columns


Out[20]:
Index([u'State', u' Facility Name', u' Facility ID (ORISPL)', u' Unit ID',
       u' Associated Stacks', u' Month', u' Year', u' Program(s)',
       u' Operating Time', u' Gross Load (MW-h)', u' Steam Load (1000lb)',
       u' SO2 (tons)', u' Avg. NOx Rate (lb/MMBtu)', u' NOx (tons)',
       u' CO2 (short tons)', u' Heat Input (MMBtu)', u' EPA Region',
       u' NERC Region', u' Unit Type', u' Fuel Type (Primary)'],
      dtype='object')

Notice that most of the columns have a leading space?


In [26]:
columns_strip = [name.strip() for name in emissions.columns]
columns_strip


Out[26]:
['State',
 'Facility Name',
 'Facility ID (ORISPL)',
 'Unit ID',
 'Associated Stacks',
 'Month',
 'Year',
 'Program(s)',
 'Operating Time',
 'Gross Load (MW-h)',
 'Steam Load (1000lb)',
 'SO2 (tons)',
 'Avg. NOx Rate (lb/MMBtu)',
 'NOx (tons)',
 'CO2 (short tons)',
 'Heat Input (MMBtu)',
 'EPA Region',
 'NERC Region',
 'Unit Type',
 'Fuel Type (Primary)']

In [27]:
emissions.columns = columns_strip
emissions.columns


Out[27]:
Index([u'State', u'Facility Name', u'Facility ID (ORISPL)', u'Unit ID',
       u'Associated Stacks', u'Month', u'Year', u'Program(s)',
       u'Operating Time', u'Gross Load (MW-h)', u'Steam Load (1000lb)',
       u'SO2 (tons)', u'Avg. NOx Rate (lb/MMBtu)', u'NOx (tons)',
       u'CO2 (short tons)', u'Heat Input (MMBtu)', u'EPA Region',
       u'NERC Region', u'Unit Type', u'Fuel Type (Primary)'],
      dtype='object')

In [31]:
emissions.dtypes


Out[31]:
State                        object
Facility Name                object
Facility ID (ORISPL)          int64
Unit ID                      object
Associated Stacks            object
Month                         int64
Year                          int64
Program(s)                   object
Operating Time              float64
Gross Load (MW-h)           float64
Steam Load (1000lb)         float64
SO2 (tons)                  float64
Avg. NOx Rate (lb/MMBtu)    float64
NOx (tons)                  float64
CO2 (short tons)            float64
Heat Input (MMBtu)          float64
EPA Region                    int64
NERC Region                  object
Unit Type                    object
Fuel Type (Primary)          object
dtype: object

A single column from a dataframe is called a Series


In [32]:
type(emissions)


Out[32]:
pandas.core.frame.DataFrame

In [30]:
type(emissions['Operating Time'])


Out[30]:
pandas.core.series.Series

In [33]:
emissions['Operating Time']


Out[33]:
0        28.00
1        17.00
2         2.75
3         2.75
4       725.25
5       588.00
6       712.50
7       609.25
8       487.25
9       716.25
10        2.53
11        2.77
12        2.31
13        2.42
14        0.00
15      393.88
16      365.35
17        0.00
18        0.00
19        0.00
20        0.00
21        0.00
22        2.00
23        2.00
24        2.00
25        3.00
26        1.00
27        6.00
28        2.00
29        2.00
         ...  
3894     21.05
3895    307.17
3896    394.15
3897    588.03
3898    281.58
3899    316.13
3900    398.32
3901     18.69
3902    673.51
3903    744.00
3904    743.30
3905    744.00
3906    650.58
3907    462.40
3908    744.00
3909    106.74
3910    743.70
3911    677.80
3912     55.90
3913    727.68
3914    744.00
3915     33.13
3916    735.62
3917    597.64
3918     15.25
3919      2.16
3920    662.62
3921    744.00
3922    581.64
3923      0.00
Name: Operating Time, dtype: float64

Index into a dataframe using .loc or .iloc with square brackets and row,column notation


In [34]:
emissions.loc[0:5,'Operating Time']


Out[34]:
0     28.00
1     17.00
2      2.75
3      2.75
4    725.25
5    588.00
Name: Operating Time, dtype: float64

In [36]:
emissions.iloc[0:5,:3]


Out[36]:
State Facility Name Facility ID (ORISPL)
0 AL AMEA Sylacauga Plant 56018
1 AL AMEA Sylacauga Plant 56018
2 AL Barry 3
3 AL Barry 3
4 AL Barry 3

Sum unit emissions for each facility using groupby


In [47]:
emissions.groupby('Facility ID (ORISPL)')


Out[47]:
<pandas.core.groupby.DataFrameGroupBy object at 0x114709cd0>

Not all columns sum well


In [57]:
facility_emiss = emissions.groupby('Facility ID (ORISPL)').sum()
# facility_emiss = facility_emiss.iloc[:,2:]
facility_emiss


Out[57]:
Month Year Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region
Facility ID (ORISPL)
3 40 16128 3844.00 1048450.25 NaN 595.911 0.5760 411.165 677078.976 8471790.125 32
7 10 4032 924.00 NaN 222159.75 0.990 0.2067 13.814 15771.300 266053.425 8
8 15 6048 1837.25 505777.25 NaN 125.662 0.5375 440.348 531418.603 5179513.525 12
9 5 2016 20.60 541.68 NaN 0.003 0.2751 1.504 NaN 10441.833 6
10 55 22176 188.25 8897.75 NaN 48.579 0.8474 10.162 11340.124 144258.150 44
26 25 10080 744.00 495791.00 NaN 173.698 0.0646 165.370 524604.900 5113099.400 20
47 65 26208 20.00 177.00 NaN 0.333 4.4379 0.722 192.400 2765.600 52
50 10 4032 0.00 NaN NaN NaN NaN NaN NaN NaN 8
51 5 2016 553.48 262392.63 NaN 1081.656 0.2080 241.863 270128.775 2481213.560 6
54 45 18144 254.45 15652.65 NaN 0.057 0.3753 2.787 11101.898 186799.693 36
56 15 6048 759.23 112793.47 NaN 63.902 0.5125 167.878 129301.129 1260257.488 12
59 5 2016 321.07 18822.00 NaN 16.264 0.2297 24.957 21946.742 209245.816 7
60 10 4032 753.95 36111.00 NaN 176.503 0.2169 39.165 44877.292 427905.242 14
87 5 2016 744.00 82439.00 NaN 34.069 0.3811 161.168 88806.200 846735.000 6
108 5 2016 660.18 160882.24 NaN 95.013 0.1813 151.412 173507.748 1654354.527 7
113 15 6048 992.93 120001.76 NaN 57.635 0.6462 149.985 135364.206 1319344.965 27
116 10 4032 247.40 8653.84 NaN 0.032 0.2216 7.130 6368.940 107186.519 18
117 15 6048 623.34 109950.39 NaN 0.257 0.0754 7.739 50958.633 857449.725 27
118 5 2016 37.80 1491.10 NaN 0.007 0.0507 0.447 1333.907 22445.386 9
120 15 6048 748.53 35334.66 NaN 0.120 0.1839 26.270 23956.571 403122.659 27
124 5 2016 7.70 360.90 NaN 0.009 0.0468 0.081 298.620 5024.270 9
126 20 8064 1101.56 76803.54 NaN 2.119 0.5604 69.574 50133.139 822072.677 36
127 5 2016 401.58 129794.52 NaN 73.983 0.2907 203.293 145337.271 1385756.804 6
130 20 8064 1518.03 822856.12 NaN 370.759 0.2077 289.871 860955.931 8391402.264 16
136 10 4032 1451.60 689645.88 NaN 477.829 0.1152 187.062 682395.436 6651035.344 8
141 15 6048 67.49 4296.61 NaN 0.014 0.5300 11.359 2730.960 45960.324 27
147 5 2016 611.23 130042.08 NaN 0.277 0.0101 3.965 54824.884 922519.859 9
160 20 8064 1970.04 185441.25 NaN 20.620 1.0331 359.939 185387.782 1905952.909 36
165 10 4032 603.34 191538.88 NaN 506.266 0.2807 170.876 239423.987 2290169.297 12
170 5 2016 638.74 139956.89 NaN 0.422 0.2584 239.594 83529.374 1405547.366 6
... ... ... ... ... ... ... ... ... ... ... ...
57037 10 4032 1398.25 368847.25 NaN 0.850 0.0271 17.634 168415.150 2833845.350 8
57073 5 2016 0.00 NaN NaN NaN NaN NaN NaN NaN 9
57074 5 2016 407.00 NaN NaN 0.015 0.0110 0.298 3187.900 54046.000 9
57075 5 2016 248.00 NaN NaN 0.007 0.0110 0.149 1602.200 27151.000 9
57241 5 2016 42.34 2051.00 NaN 0.090 0.0555 1.154 3480.042 33986.189 4
57267 20 8064 18.74 2488.57 NaN 0.008 0.0434 0.123 1712.645 28817.647 36
57349 5 2016 393.61 100963.68 NaN 0.207 0.0089 2.387 40922.021 688589.496 3
57482 40 16128 451.97 29914.41 NaN 0.088 0.1731 2.092 17568.752 295618.850 72
57483 20 8064 345.92 10334.14 NaN 0.036 0.0692 0.712 6236.594 104945.273 36
57515 25 10080 823.60 65754.07 NaN 0.174 0.1513 5.561 34605.858 582304.228 45
57703 15 6048 733.14 19706.25 NaN 0.060 0.0540 0.859 12035.438 202527.156 24
57839 10 4032 1101.30 357284.80 NaN 0.639 0.0110 5.838 126516.225 2128891.010 4
57865 15 6048 81.55 10928.02 NaN 0.034 0.1633 2.704 6779.163 114074.151 18
57881 15 6048 580.92 20205.20 NaN 0.062 0.0855 1.784 12307.201 207098.166 24
57943 15 6048 1753.37 60954.81 NaN 0.183 0.0481 3.916 36090.079 607288.728 24
57978 5 2016 16.81 2461.69 NaN 0.008 0.0201 0.119 1625.035 27347.542 9
58001 20 8064 1485.62 417253.89 NaN 0.935 0.0426 12.778 185161.751 3115749.902 24
58005 10 4032 994.60 292270.75 NaN 0.644 0.0166 7.113 127678.269 2148462.119 12
58054 5 2016 743.03 53385.00 NaN 1.455 0.0584 19.356 69154.702 664207.371 1
58079 10 4032 1268.00 385188.46 NaN 0.756 0.0128 7.746 149796.191 2520589.201 4
58122 5 2016 2.57 61.56 NaN 0.000 0.0775 0.048 43.560 732.625 9
58235 5 2016 119.55 7160.00 NaN 0.020 0.0191 0.384 3909.089 65770.895 2
58260 15 6048 1824.11 705418.26 NaN 1.468 0.0321 17.479 290920.901 4895295.214 9
58284 5 2016 16.62 483.78 NaN 0.001 0.0250 0.032 289.595 4874.589 6
58325 5 2016 7.93 471.64 NaN 0.002 0.0457 0.147 390.728 6576.714 6
58471 10 4032 81.30 8786.34 NaN 0.142 0.1028 1.593 6244.180 105066.232 12
58478 5 2016 47.25 1823.50 NaN 0.005 0.0215 0.148 968.200 16293.125 6
58562 10 4032 615.09 40172.06 NaN 0.111 0.0296 2.077 21929.390 368989.001 12
59926 10 4032 24.63 2794.92 NaN 0.015 0.0320 0.806 2972.500 50368.700 10
70454 10 4032 62.78 7178.74 NaN 0.026 0.1711 3.388 5164.599 86899.023 8

1208 rows × 11 columns

Use apply to apply a function to every row of the dataframe

If we want to keep the EPA Region, there are probably better ways to do it than this. We will write a little function that divides the month by 5 (May) and then divides the region by that result.


In [63]:
def correct_region(row):
    num_units = row['Month'] / 5
    region = row['EPA Region'] / num_units
    return region

Not sure why this is returning a float. Go back to the function and return an int instead.


In [64]:
facility_emiss.apply(correct_region, axis=1)


Out[64]:
Facility ID (ORISPL)
3        4.0
7        4.0
8        4.0
9        6.0
10       4.0
26       4.0
47       4.0
50       4.0
51       6.0
54       4.0
56       4.0
59       7.0
60       7.0
87       6.0
108      7.0
113      9.0
116      9.0
117      9.0
118      9.0
120      9.0
124      9.0
126      9.0
127      6.0
130      4.0
136      4.0
141      9.0
147      9.0
160      9.0
165      6.0
170      6.0
        ... 
57037    4.0
57073    9.0
57074    9.0
57075    9.0
57241    4.0
57267    9.0
57349    3.0
57482    9.0
57483    9.0
57515    9.0
57703    8.0
57839    2.0
57865    6.0
57881    8.0
57943    8.0
57978    9.0
58001    6.0
58005    6.0
58054    1.0
58079    2.0
58122    9.0
58235    2.0
58260    3.0
58284    6.0
58325    6.0
58471    6.0
58478    6.0
58562    6.0
59926    5.0
70454    4.0
dtype: float64

In [65]:
facility_emiss.loc[:,'EPA Region'] = facility_emiss.apply(correct_region, axis=1)
facility_emiss = facility_emiss.iloc[:,2:]
facility_emiss


Out[65]:
Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region
Facility ID (ORISPL)
3 3844.00 1048450.25 NaN 595.911 0.5760 411.165 677078.976 8471790.125 4.0
7 924.00 NaN 222159.75 0.990 0.2067 13.814 15771.300 266053.425 4.0
8 1837.25 505777.25 NaN 125.662 0.5375 440.348 531418.603 5179513.525 4.0
9 20.60 541.68 NaN 0.003 0.2751 1.504 NaN 10441.833 6.0
10 188.25 8897.75 NaN 48.579 0.8474 10.162 11340.124 144258.150 4.0
26 744.00 495791.00 NaN 173.698 0.0646 165.370 524604.900 5113099.400 4.0
47 20.00 177.00 NaN 0.333 4.4379 0.722 192.400 2765.600 4.0
50 0.00 NaN NaN NaN NaN NaN NaN NaN 4.0
51 553.48 262392.63 NaN 1081.656 0.2080 241.863 270128.775 2481213.560 6.0
54 254.45 15652.65 NaN 0.057 0.3753 2.787 11101.898 186799.693 4.0
56 759.23 112793.47 NaN 63.902 0.5125 167.878 129301.129 1260257.488 4.0
59 321.07 18822.00 NaN 16.264 0.2297 24.957 21946.742 209245.816 7.0
60 753.95 36111.00 NaN 176.503 0.2169 39.165 44877.292 427905.242 7.0
87 744.00 82439.00 NaN 34.069 0.3811 161.168 88806.200 846735.000 6.0
108 660.18 160882.24 NaN 95.013 0.1813 151.412 173507.748 1654354.527 7.0
113 992.93 120001.76 NaN 57.635 0.6462 149.985 135364.206 1319344.965 9.0
116 247.40 8653.84 NaN 0.032 0.2216 7.130 6368.940 107186.519 9.0
117 623.34 109950.39 NaN 0.257 0.0754 7.739 50958.633 857449.725 9.0
118 37.80 1491.10 NaN 0.007 0.0507 0.447 1333.907 22445.386 9.0
120 748.53 35334.66 NaN 0.120 0.1839 26.270 23956.571 403122.659 9.0
124 7.70 360.90 NaN 0.009 0.0468 0.081 298.620 5024.270 9.0
126 1101.56 76803.54 NaN 2.119 0.5604 69.574 50133.139 822072.677 9.0
127 401.58 129794.52 NaN 73.983 0.2907 203.293 145337.271 1385756.804 6.0
130 1518.03 822856.12 NaN 370.759 0.2077 289.871 860955.931 8391402.264 4.0
136 1451.60 689645.88 NaN 477.829 0.1152 187.062 682395.436 6651035.344 4.0
141 67.49 4296.61 NaN 0.014 0.5300 11.359 2730.960 45960.324 9.0
147 611.23 130042.08 NaN 0.277 0.0101 3.965 54824.884 922519.859 9.0
160 1970.04 185441.25 NaN 20.620 1.0331 359.939 185387.782 1905952.909 9.0
165 603.34 191538.88 NaN 506.266 0.2807 170.876 239423.987 2290169.297 6.0
170 638.74 139956.89 NaN 0.422 0.2584 239.594 83529.374 1405547.366 6.0
... ... ... ... ... ... ... ... ... ...
57037 1398.25 368847.25 NaN 0.850 0.0271 17.634 168415.150 2833845.350 4.0
57073 0.00 NaN NaN NaN NaN NaN NaN NaN 9.0
57074 407.00 NaN NaN 0.015 0.0110 0.298 3187.900 54046.000 9.0
57075 248.00 NaN NaN 0.007 0.0110 0.149 1602.200 27151.000 9.0
57241 42.34 2051.00 NaN 0.090 0.0555 1.154 3480.042 33986.189 4.0
57267 18.74 2488.57 NaN 0.008 0.0434 0.123 1712.645 28817.647 9.0
57349 393.61 100963.68 NaN 0.207 0.0089 2.387 40922.021 688589.496 3.0
57482 451.97 29914.41 NaN 0.088 0.1731 2.092 17568.752 295618.850 9.0
57483 345.92 10334.14 NaN 0.036 0.0692 0.712 6236.594 104945.273 9.0
57515 823.60 65754.07 NaN 0.174 0.1513 5.561 34605.858 582304.228 9.0
57703 733.14 19706.25 NaN 0.060 0.0540 0.859 12035.438 202527.156 8.0
57839 1101.30 357284.80 NaN 0.639 0.0110 5.838 126516.225 2128891.010 2.0
57865 81.55 10928.02 NaN 0.034 0.1633 2.704 6779.163 114074.151 6.0
57881 580.92 20205.20 NaN 0.062 0.0855 1.784 12307.201 207098.166 8.0
57943 1753.37 60954.81 NaN 0.183 0.0481 3.916 36090.079 607288.728 8.0
57978 16.81 2461.69 NaN 0.008 0.0201 0.119 1625.035 27347.542 9.0
58001 1485.62 417253.89 NaN 0.935 0.0426 12.778 185161.751 3115749.902 6.0
58005 994.60 292270.75 NaN 0.644 0.0166 7.113 127678.269 2148462.119 6.0
58054 743.03 53385.00 NaN 1.455 0.0584 19.356 69154.702 664207.371 1.0
58079 1268.00 385188.46 NaN 0.756 0.0128 7.746 149796.191 2520589.201 2.0
58122 2.57 61.56 NaN 0.000 0.0775 0.048 43.560 732.625 9.0
58235 119.55 7160.00 NaN 0.020 0.0191 0.384 3909.089 65770.895 2.0
58260 1824.11 705418.26 NaN 1.468 0.0321 17.479 290920.901 4895295.214 3.0
58284 16.62 483.78 NaN 0.001 0.0250 0.032 289.595 4874.589 6.0
58325 7.93 471.64 NaN 0.002 0.0457 0.147 390.728 6576.714 6.0
58471 81.30 8786.34 NaN 0.142 0.1028 1.593 6244.180 105066.232 6.0
58478 47.25 1823.50 NaN 0.005 0.0215 0.148 968.200 16293.125 6.0
58562 615.09 40172.06 NaN 0.111 0.0296 2.077 21929.390 368989.001 6.0
59926 24.63 2794.92 NaN 0.015 0.0320 0.806 2972.500 50368.700 5.0
70454 62.78 7178.74 NaN 0.026 0.1711 3.388 5164.599 86899.023 4.0

1208 rows × 9 columns


In [ ]:

Load capacity data


In [9]:
capacity = pd.read_excel(fn2, sheetname='Operating', header=1)

In [10]:
capacity.head()


Out[10]:
Entity ID Entity Name Plant ID Plant Name Sector Plant State Generator ID Nameplate Capacity (MW) Net Summer Capacity (MW) Technology ... Status Planned Derate Year Planned Derate Month Planned Derate of Summer Capacity (MW) Planned Uprate Year Planned Uprate Month Planned Uprate of Summer Capacity (MW) County Latitude Longitude
0 195 Alabama Power Co 2.0 Bankhead Dam Electric Utility AL 1 53.9 56 Conventional Hydroelectric ... (OP) Operating Tuscaloosa 33.4587 -87.3568
1 195 Alabama Power Co 3.0 Barry Electric Utility AL 1 153.1 55 Natural Gas Steam Turbine ... (OP) Operating Mobile 31.0069 -88.0103
2 195 Alabama Power Co 3.0 Barry Electric Utility AL 2 153.1 55 Natural Gas Steam Turbine ... (OP) Operating Mobile 31.0069 -88.0103
3 195 Alabama Power Co 3.0 Barry Electric Utility AL 4 403.7 362 Conventional Steam Coal ... (OP) Operating Mobile 31.0069 -88.0103
4 195 Alabama Power Co 3.0 Barry Electric Utility AL 5 788.8 726.5 Conventional Steam Coal ... (OP) Operating Mobile 31.0069 -88.0103

5 rows × 26 columns


In [12]:
capacity.tail()


Out[12]:
Entity ID Entity Name Plant ID Plant Name Sector Plant State Generator ID Nameplate Capacity (MW) Net Summer Capacity (MW) Technology ... Status Planned Derate Year Planned Derate Month Planned Derate of Summer Capacity (MW) Planned Uprate Year Planned Uprate Month Planned Uprate of Summer Capacity (MW) County Latitude Longitude
20183 60205 Currin Solar, LLC 60403.0 Currin Solar, LLC IPP Non-CHP NC CSPV 5.2 5.2 Solar Photovoltaic ... (OP) Operating Granville 36.2896 -78.6333
20184 60163 Soltage LLC 60411.0 Coventry Photovoltaic, LLC IPP Non-CHP VT COVEN 2.2 2.2 Solar Photovoltaic ... (OP) Operating Orleans 44.9102 -72.2198
20185 60227 ORNI 37 LLC 60419.0 Don A Campbell 2 Geothermal IPP Non-CHP NV DAC2G 25.0 16.2 Geothermal ... (OP) Operating Mineral 38.8358 -118.325
20186 57081 WGL Energy Systems, Inc 60425.0 Skidmore College IPP Non-CHP NY SO049 2.0 2 Solar Photovoltaic ... (OP) Operating Saratoga 43.0937 -73.8113
20187 NOTES:\nCapacity from facilities with a total ... NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 26 columns


In [16]:
capacity.drop(20187, inplace=True)

In [18]:
capacity.loc[:,'Plant ID'] = capacity.loc[:,'Plant ID'].astype(int)

In [19]:
capacity.head()


Out[19]:
Entity ID Entity Name Plant ID Plant Name Sector Plant State Generator ID Nameplate Capacity (MW) Net Summer Capacity (MW) Technology ... Status Planned Derate Year Planned Derate Month Planned Derate of Summer Capacity (MW) Planned Uprate Year Planned Uprate Month Planned Uprate of Summer Capacity (MW) County Latitude Longitude
0 195 Alabama Power Co 2 Bankhead Dam Electric Utility AL 1 53.9 56 Conventional Hydroelectric ... (OP) Operating Tuscaloosa 33.4587 -87.3568
1 195 Alabama Power Co 3 Barry Electric Utility AL 1 153.1 55 Natural Gas Steam Turbine ... (OP) Operating Mobile 31.0069 -88.0103
2 195 Alabama Power Co 3 Barry Electric Utility AL 2 153.1 55 Natural Gas Steam Turbine ... (OP) Operating Mobile 31.0069 -88.0103
3 195 Alabama Power Co 3 Barry Electric Utility AL 4 403.7 362 Conventional Steam Coal ... (OP) Operating Mobile 31.0069 -88.0103
4 195 Alabama Power Co 3 Barry Electric Utility AL 5 788.8 726.5 Conventional Steam Coal ... (OP) Operating Mobile 31.0069 -88.0103

5 rows × 26 columns

Check the column names

Sure enough, there are some weird issues


In [70]:
capacity.columns


Out[70]:
Index([u'Entity ID', u'Entity Name', u'Plant ID', u'Plant Name', u'Sector',
       u'Plant State', u'Generator ID', u'\nNameplate Capacity (MW)',
       u'\n \nNet Summer Capacity (MW)', u'Technology', u'Energy Source Code',
       u'Prime Mover Code', u'Operating Month', u'Operating Year',
       u'Planned Retirement Month', u'Planned Retirement Year', u'Status',
       u'Planned Derate Year', u'Planned Derate Month',
       u'Planned Derate of Summer Capacity (MW)', u'Planned Uprate Year',
       u'Planned Uprate Month', u'Planned Uprate of Summer Capacity (MW)',
       u'County', u'Latitude', u'Longitude'],
      dtype='object')

In [71]:
capacity.columns = [name.strip() for name in capacity.columns]
capacity.columns


Out[71]:
Index([u'Entity ID', u'Entity Name', u'Plant ID', u'Plant Name', u'Sector',
       u'Plant State', u'Generator ID', u'Nameplate Capacity (MW)',
       u'Net Summer Capacity (MW)', u'Technology', u'Energy Source Code',
       u'Prime Mover Code', u'Operating Month', u'Operating Year',
       u'Planned Retirement Month', u'Planned Retirement Year', u'Status',
       u'Planned Derate Year', u'Planned Derate Month',
       u'Planned Derate of Summer Capacity (MW)', u'Planned Uprate Year',
       u'Planned Uprate Month', u'Planned Uprate of Summer Capacity (MW)',
       u'County', u'Latitude', u'Longitude'],
      dtype='object')

Boolean filtering


In [43]:
PA_cap = capacity.loc[capacity['Plant State']=='PA',:]
PA_cap


Out[43]:
Entity ID Entity Name Plant ID Plant Name Sector Plant State Generator ID Nameplate Capacity (MW) Net Summer Capacity (MW) Technology ... Status Planned Derate Year Planned Derate Month Planned Derate of Summer Capacity (MW) Planned Uprate Year Planned Uprate Month Planned Uprate of Summer Capacity (MW) County Latitude Longitude
5223 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 1A 25.5 15 Petroleum Liquids ... (OP) Operating Allegheny 40.4649 -80.0438
5224 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 2A 65.3 46 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5225 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 2B 65.3 48 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5226 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 3 65.3 49 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5227 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA ST4 144.0 101 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5228 17235 NRG REMA LLC 3109 Hamilton (PA) IPP Non-CHP PA 1 19.6 18 Petroleum Liquids ... (OP) Operating Adams 39.9087 -76.9885
5229 17235 NRG REMA LLC 3110 Hunterstown IPP Non-CHP PA 1 20.0 18 Petroleum Liquids ... (OP) Operating Adams 39.8662 -77.1648
5230 17235 NRG REMA LLC 3110 Hunterstown IPP Non-CHP PA 2 20.0 17 Petroleum Liquids ... (OP) Operating Adams 39.8662 -77.1648
5231 17235 NRG REMA LLC 3110 Hunterstown IPP Non-CHP PA 3 20.0 18 Petroleum Liquids ... (OP) Operating Adams 39.8662 -77.1648
5232 17235 NRG REMA LLC 3111 Mountain IPP Non-CHP PA 1 27.0 18 Petroleum Liquids ... (OP) Operating Cumberland 40.1229 -77.1723
5233 17235 NRG REMA LLC 3111 Mountain IPP Non-CHP PA 2 27.0 18 Petroleum Liquids ... (OP) Operating Cumberland 40.1229 -77.1723
5234 17235 NRG REMA LLC 3112 Orrtanna IPP Non-CHP PA 1 27.0 18 Petroleum Liquids ... (OP) Operating Adams 39.8442 -77.3508
5235 17235 NRG REMA LLC 3113 Portland (PA) IPP Non-CHP PA 3 18.0 13 Petroleum Liquids ... (OP) Operating Northampton 40.9102 -75.0794
5236 17235 NRG REMA LLC 3113 Portland (PA) IPP Non-CHP PA 4 20.0 17 Petroleum Liquids ... (OP) Operating Northampton 40.9102 -75.0794
5237 17235 NRG REMA LLC 3113 Portland (PA) IPP Non-CHP PA 5 156.0 134 Petroleum Liquids ... (OP) Operating Northampton 40.9102 -75.0794
5238 17235 NRG REMA LLC 3114 Shawnee (PA) IPP Non-CHP PA 1 20.0 17 Petroleum Liquids ... (OP) Operating Monroe 41.0612 -75.0582
5239 17235 NRG REMA LLC 3115 Titus IPP Non-CHP PA 4 18.0 13 Petroleum Liquids ... (OP) Operating Berks 40.3056 -75.9081
5240 17235 NRG REMA LLC 3115 Titus IPP Non-CHP PA 5 18.0 14 Petroleum Liquids ... (OP) Operating Berks 40.3056 -75.9081
5241 17235 NRG REMA LLC 3116 Tolna IPP Non-CHP PA 1 27.0 18 Petroleum Liquids ... (SB) Standby/Backup: available for service but... York 39.7606 -76.6353
5242 17235 NRG REMA LLC 3116 Tolna IPP Non-CHP PA 2 27.0 18 Petroleum Liquids ... (SB) Standby/Backup: available for service but... York 39.7606 -76.6353
5243 56065 York Haven Power Company LLC 3117 York Haven IPP Non-CHP PA 1 19.6 19 Conventional Hydroelectric ... (OP) Operating York 40.1136 -76.712
5244 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA 1 936.0 850 Conventional Steam Coal ... (OP) Operating Indiana 40.3842 -79.0611
5245 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA 2 936.0 850 Conventional Steam Coal ... (OP) Operating Indiana 40.3842 -79.0611
5246 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA A 3.0 2.8 Petroleum Liquids ... (SB) Standby/Backup: available for service but... Indiana 40.3842 -79.0611
5247 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA B 3.0 2.8 Petroleum Liquids ... (SB) Standby/Backup: available for service but... Indiana 40.3842 -79.0611
5248 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA C 3.0 2.8 Petroleum Liquids ... (SB) Standby/Backup: available for service but... Indiana 40.3842 -79.0611
5249 15873 GenOn Northeast Management Company 3118 Conemaugh IPP Non-CHP PA D 3.0 2.8 Petroleum Liquids ... (SB) Standby/Backup: available for service but... Indiana 40.3842 -79.0611
5250 17235 NRG REMA LLC 3120 Blossburg IPP Non-CHP PA 1 24.0 16 Natural Gas Fired Combustion Turbine ... (OP) Operating Tioga 41.7065 -77.0818
5251 58615 NRG Homer City Services LLC 3122 Homer City Generating Station IPP Non-CHP PA 1 660.0 624.5 Conventional Steam Coal ... (OP) Operating Indiana 40.5128 -79.1961
5252 58615 NRG Homer City Services LLC 3122 Homer City Generating Station IPP Non-CHP PA 2 660.0 617.5 Conventional Steam Coal ... (OP) Operating Indiana 40.5128 -79.1961
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
18480 58438 Sunstream Energy LLC 58443 Martin Limestone Solar Array IPP Non-CHP PA GEN1 1.0 1 Solar Photovoltaic ... (OP) Operating Lancaster 40.1078 -76.0617
18526 58463 Dart Container Corp 58476 Dart Container Corp Industrial CHP PA LFGT1 5.6 4.6 Landfill Gas ... (OP) Operating Lancaster 40.0865 -76.176
18527 58463 Dart Container Corp 58476 Dart Container Corp Industrial CHP PA LFGT2 5.6 4.6 Landfill Gas ... (OP) Operating Lancaster 40.0865 -76.176
18549 7477 Granger Electric Co 58497 L&S Sweetners IPP Non-CHP PA GEN1 1.6 1.6 Landfill Gas ... (OP) Operating Lancaster 40.0906 -76.1456
18550 7477 Granger Electric Co 58497 L&S Sweetners IPP Non-CHP PA GEN2 1.6 1.6 Landfill Gas ... (OP) Operating Lancaster 40.0906 -76.1456
18598 56645 Aqua America Inc 58535 Pickering Solar IPP Non-CHP PA 1 1.5 1.4 Solar Photovoltaic ... (OP) Operating Chester 40.1206 -75.4881
18645 58527 Gettysburg Energy and Nutrient Rec Facility LLC 58565 Gettysburg Energy & Nutrient Rec Facility IPP Non-CHP PA GENRF 3.3 2.5 Other Waste Biomass ... (OP) Operating Adams 39.9503 -77.1253
18785 58617 Mahoning Creek Hydroelectric Company LLC 58685 Mahoning Creek Hydroelectric Project IPP Non-CHP PA 8107 6.0 6.7 Conventional Hydroelectric ... (OP) Operating Armstrong 40.9211 -79.2817
18818 58652 Roundtop Energy LLC 58715 Roundtop IPP Non-CHP PA GEN1 4.4 4.2 Natural Gas Internal Combustion Engine ... (OP) Operating Susquehanna 41.6578 -76.0492
18819 58652 Roundtop Energy LLC 58715 Roundtop IPP Non-CHP PA GEN2 4.4 4.2 Natural Gas Internal Combustion Engine ... (OP) Operating Susquehanna 41.6578 -76.0492
18820 58652 Roundtop Energy LLC 58715 Roundtop IPP Non-CHP PA GEN3 4.4 4.2 Natural Gas Internal Combustion Engine ... (OP) Operating Susquehanna 41.6578 -76.0492
18821 58652 Roundtop Energy LLC 58715 Roundtop IPP Non-CHP PA GEN4 4.4 4.2 Natural Gas Internal Combustion Engine ... (OP) Operating Susquehanna 41.6578 -76.0492
18822 58652 Roundtop Energy LLC 58715 Roundtop IPP Non-CHP PA GEN5 4.4 4.2 Natural Gas Internal Combustion Engine ... (OP) Operating Susquehanna 41.6578 -76.0492
19003 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G01 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19004 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G02 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19005 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G03 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19006 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G04 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19007 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G05 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19008 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G06 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19009 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G07 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19010 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G08 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19011 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G09 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19012 58767 Temple University 58897 Temple SEGF Commercial Non-CHP PA 19G10 1.6 1.6 Natural Gas Internal Combustion Engine ... (OP) Operating Philadelphia 39.98 -75.1506
19202 57130 Janssen Pharmaceutical Co 59056 Spring House IPP Non-CHP PA GEN 1 3.8 3.8 Natural Gas Internal Combustion Engine ... (OP) Operating Montgomery 40.1839 -75.2342
19435 59125 Marlboro Mushroom 59327 Marlboro Mushrooms Solar Field Industrial Non-CHP PA GEN1 1.0 1 Solar Photovoltaic ... (OP) Operating Chester 39.8828 -75.8294
19636 59287 Laurel Capital Partners 59546 Beaver Solar LLC IPP Non-CHP PA PV1 1.3 1.3 Solar Photovoltaic ... (OP) Operating Beaver 40.6914 -80.3239
19822 59526 State Correctnl Inst Laurel Highlands 59759 Glades Pike Generation Plant Commercial Non-CHP PA CAT1 1.6 1.4 Landfill Gas ... (OP) Operating Somerset 40.0067 -79.041
19823 59526 State Correctnl Inst Laurel Highlands 59759 Glades Pike Generation Plant Commercial Non-CHP PA CAT2 1.6 1.4 Landfill Gas ... (OP) Operating Somerset 40.0067 -79.041
19824 59526 State Correctnl Inst Laurel Highlands 59759 Glades Pike Generation Plant Commercial Non-CHP PA SOLAR 3.3 1.5 Landfill Gas ... (OP) Operating Somerset 40.0067 -79.041
20180 60187 Granger Energy of Morgantown 60388 Granger Energy of Morgantown IPP Non-CHP PA GEMT 1.6 1.6 Landfill Gas ... (OP) Operating Berks 40.1746 -75.9137

568 rows × 26 columns


In [45]:
PA_NGCC_cap = capacity.loc[(capacity['Plant State']=='PA') &
             (capacity['Technology']=='Natural Gas Fired Combined Cycle'),:]
PA_NGCC_cap


Out[45]:
Entity ID Entity Name Plant ID Plant Name Sector Plant State Generator ID Nameplate Capacity (MW) Net Summer Capacity (MW) Technology ... Status Planned Derate Year Planned Derate Month Planned Derate of Summer Capacity (MW) Planned Uprate Year Planned Uprate Month Planned Uprate of Summer Capacity (MW) County Latitude Longitude
5224 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 2A 65.3 46 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5225 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 2B 65.3 48 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5226 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA 3 65.3 49 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5227 14165 NRG Power Midwest LP 3096 Brunot Island IPP Non-CHP PA ST4 144.0 101 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.4649 -80.0438
5381 19391 UGI Development Co 3176 Hunlock Power Station IPP Non-CHP PA 3 49.9 30.1 Natural Gas Fired Combined Cycle ... (OP) Operating Luzerne 41.2006 -76.07
5382 19391 UGI Development Co 3176 Hunlock Power Station IPP Non-CHP PA 5 48.0 48.7 Natural Gas Fired Combined Cycle ... (OP) Operating Luzerne 41.2006 -76.07
5383 19391 UGI Development Co 3176 Hunlock Power Station IPP Non-CHP PA 6 48.0 48.4 Natural Gas Fired Combined Cycle ... (OP) Operating Luzerne 41.2006 -76.07
11869 2468 Bucknell University 54333 Bucknell University Commercial CHP PA G001 4.7 4.3 Natural Gas Fired Combined Cycle ... (OP) Operating Union 40.955 -76.8788
11870 2468 Bucknell University 54333 Bucknell University Commercial CHP PA G502 1.2 0.5 Natural Gas Fired Combined Cycle ... (OP) Operating Union 40.955 -76.8788
12320 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA GT#1 8.3 46.2 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12321 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA GT#2 8.3 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12322 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA GT#5 8.3 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12323 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA GT#6 8.3 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12324 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA ST#1 9.5 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12325 56516 Morris Energy Operations Company, LLC 54693 York Generation Company LLC IPP Non-CHP PA ST#2 9.5 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.9856 -76.6762
12528 7564 Grays Ferry Cogen Partnership 54785 Grays Ferry Cogeneration IPP CHP PA GEN1 57.6 57 Natural Gas Fired Combined Cycle ... (OP) Operating Philadelphia 39.9422 -75.1881
12529 7564 Grays Ferry Cogen Partnership 54785 Grays Ferry Cogeneration IPP CHP PA GEN2 135.0 113 Natural Gas Fired Combined Cycle ... (OP) Operating Philadelphia 39.9422 -75.1881
13268 55649 Ontelaunee Energy Center 55193 Ontelaunee Energy Center IPP Non-CHP PA CTG1 250.0 180 Natural Gas Fired Combined Cycle ... (OP) Operating Berks 40.4219 -75.9356
13269 55649 Ontelaunee Energy Center 55193 Ontelaunee Energy Center IPP Non-CHP PA CTG2 250.0 180 Natural Gas Fired Combined Cycle ... (OP) Operating Berks 40.4219 -75.9356
13270 55649 Ontelaunee Energy Center 55193 Ontelaunee Energy Center IPP Non-CHP PA STG 228.0 180 Natural Gas Fired Combined Cycle ... (OP) Operating Berks 40.4219 -75.9356
13389 27031 Liberty Electric Power LLC 55231 Liberty Electric Power Plant IPP Non-CHP PA GTG1 186.0 158.6 Natural Gas Fired Combined Cycle ... (OP) Operating 2017 4 10.5 Delaware 39.8614 -75.3358
13390 27031 Liberty Electric Power LLC 55231 Liberty Electric Power Plant IPP Non-CHP PA GTG2 186.0 158.6 Natural Gas Fired Combined Cycle ... (OP) Operating 2016 10 10.5 Delaware 39.8614 -75.3358
13391 27031 Liberty Electric Power LLC 55231 Liberty Electric Power Plant IPP Non-CHP PA STG 242.0 223.8 Natural Gas Fired Combined Cycle ... (OP) Operating Delaware 39.8614 -75.3358
13604 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA CT1A 198.9 189 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13605 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA CT1B 198.9 189 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13606 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA CT2A 198.9 189 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13607 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA CT2B 198.9 189 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13608 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA ST1 271.2 258 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13609 54817 Fairless Energy LLC 55298 Fairless Energy Center IPP Non-CHP PA ST2 271.2 258 Natural Gas Fired Combined Cycle ... (OP) Operating Bucks 40.1475 -74.7411
13674 58199 TalenEnergy Ironwood LLC 55337 TalenEnergy Ironwood LLC IPP Non-CHP PA CT1 259.2 218.7 Natural Gas Fired Combined Cycle ... (OP) Operating Lebanon 40.3509 -76.3658
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
13676 58199 TalenEnergy Ironwood LLC 55337 TalenEnergy Ironwood LLC IPP Non-CHP PA ST4 259.2 219.7 Natural Gas Fired Combined Cycle ... (OP) Operating Lebanon 40.3509 -76.3658
13964 59923 Dynegy Fayette Energy Facility 55516 Fayette Energy Facility IPP Non-CHP PA CTG1 163.5 174 Natural Gas Fired Combined Cycle ... (OP) Operating Fayette 39.8592 -79.9182
13965 59923 Dynegy Fayette Energy Facility 55516 Fayette Energy Facility IPP Non-CHP PA CTG2 163.5 174 Natural Gas Fired Combined Cycle ... (OP) Operating Fayette 39.8592 -79.9182
13966 59923 Dynegy Fayette Energy Facility 55516 Fayette Energy Facility IPP Non-CHP PA STG1 317.1 314 Natural Gas Fired Combined Cycle ... (OP) Operating Fayette 39.8592 -79.9182
13986 56608 Calpine Mid-Merit LLC 55524 York Energy Center IPP Non-CHP PA CTG1 120.0 113 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.7375 -76.3067
13987 56608 Calpine Mid-Merit LLC 55524 York Energy Center IPP Non-CHP PA CTG2 120.0 122 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.7375 -76.3067
13988 56608 Calpine Mid-Merit LLC 55524 York Energy Center IPP Non-CHP PA CTG3 120.0 122 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.7375 -76.3067
13989 56608 Calpine Mid-Merit LLC 55524 York Energy Center IPP Non-CHP PA STG1 200.0 188 Natural Gas Fired Combined Cycle ... (OP) Operating York 39.7375 -76.3067
14161 11275 Lower Mount Bethel Energy LLC 55667 Lower Mount Bethel Energy IPP Non-CHP PA G1 211.5 160.9 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.8019 -75.1076
14162 11275 Lower Mount Bethel Energy LLC 55667 Lower Mount Bethel Energy IPP Non-CHP PA G2 211.5 162.6 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.8019 -75.1076
14163 11275 Lower Mount Bethel Energy LLC 55667 Lower Mount Bethel Energy IPP Non-CHP PA G3 228.6 214 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.8019 -75.1076
14180 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG1 127.0 118 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14181 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG2 127.0 127 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14182 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG3 127.0 127 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14183 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG5 127.0 118 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14184 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG6 127.0 127 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14185 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA CTG7 127.0 127 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14186 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA STG4 195.5 195 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14187 56607 Calpine Bethlehem LLC 55690 Bethlehem Power Plant IPP Non-CHP PA STG8 195.5 195 Natural Gas Fired Combined Cycle ... (OP) Operating Northampton 40.6175 -75.3147
14204 23279 Allegheny Energy Supply Co LLC 55710 FirstEnergy Allegheny Energy Units 3 4 & 5 IPP Non-CHP PA UNT3 184.0 167 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.5453 -79.7686
14205 23279 Allegheny Energy Supply Co LLC 55710 FirstEnergy Allegheny Energy Units 3 4 & 5 IPP Non-CHP PA UNT4 184.0 167 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.5453 -79.7686
14206 23279 Allegheny Energy Supply Co LLC 55710 FirstEnergy Allegheny Energy Units 3 4 & 5 IPP Non-CHP PA UNT5 188.0 175 Natural Gas Fired Combined Cycle ... (OP) Operating Allegheny 40.5453 -79.7686
14290 6693 FPL Energy Marcus Hook LP 55801 FPL Energy Marcus Hook LP IPP CHP PA CT13 188.2 172 Natural Gas Fired Combined Cycle ... (OP) Operating Delaware 39.807 -75.4216
14291 6693 FPL Energy Marcus Hook LP 55801 FPL Energy Marcus Hook LP IPP CHP PA CT1A 188.2 178.7 Natural Gas Fired Combined Cycle ... (OP) Operating Delaware 39.807 -75.4216
14292 6693 FPL Energy Marcus Hook LP 55801 FPL Energy Marcus Hook LP IPP CHP PA CTIB 188.2 172 Natural Gas Fired Combined Cycle ... (OP) Operating Delaware 39.807 -75.4216
14293 6693 FPL Energy Marcus Hook LP 55801 FPL Energy Marcus Hook LP IPP CHP PA STG 271.5 263 Natural Gas Fired Combined Cycle ... (OP) Operating Delaware 39.807 -75.4216
14433 54885 NRG Wholesale Generation LP 55976 Hunterstown Power Plant IPP Non-CHP PA 101 179.0 153 Natural Gas Fired Combined Cycle ... (OP) Operating Adams 39.8725 -77.1672
14434 54885 NRG Wholesale Generation LP 55976 Hunterstown Power Plant IPP Non-CHP PA 201 179.0 153 Natural Gas Fired Combined Cycle ... (OP) Operating Adams 39.8725 -77.1672
14435 54885 NRG Wholesale Generation LP 55976 Hunterstown Power Plant IPP Non-CHP PA 301 179.0 153 Natural Gas Fired Combined Cycle ... (OP) Operating Adams 39.8725 -77.1672
14436 54885 NRG Wholesale Generation LP 55976 Hunterstown Power Plant IPP Non-CHP PA 401 361.0 299 Natural Gas Fired Combined Cycle ... (OP) Operating Adams 39.8725 -77.1672

61 rows × 26 columns

Repeat groupby and sum to get capacity of facilities


In [72]:
cols = ['Plant ID', 'Nameplate Capacity (MW)']
facility_cap = capacity.loc[:,cols].groupby('Plant ID').sum()
facility_cap


Out[72]:
Nameplate Capacity (MW)
Plant ID
2 53.9
3 2569.5
4 225.0
7 138.0
8 1166.7
9 80.5
10 1288.4
11 72.9
12 46.9
13 100.0
14 128.1
15 177.0
16 210.6
17 170.0
18 181.0
19 91.0
20 87.6
21 50.5
26 2034.0
30 11.8
34 12.1
38 101.1
46 3494.0
47 1026.0
48 115.2
49 404.0
50 575.0
51 720.7
53 3.0
54 1055.0
... ...
60262 73.4
60263 1.6
60265 4.4
60267 2.7
60275 2.0
60276 4.1
60277 1.5
60278 1.9
60299 7.0
60312 4.9
60313 4.9
60314 35.8
60320 1.2
60321 16.1
60327 1.8
60341 1.0
60344 2.0
60349 2.3
60353 2.0
60365 1.6
60367 5.2
60369 5.2
60384 5.0
60388 1.6
60390 31.5
60401 5.2
60403 5.2
60411 2.2
60419 25.0
60425 2.0

7561 rows × 1 columns

Load generation data


In [73]:
generation = pd.read_excel(fn3, header=5)

Something weird is going on here. I know there are lots of rows with numeric data that are missing from this describe table.


In [108]:
generation.describe()


Out[108]:
Plant Id Nuclear Unit Id Operator Id Reserved NAICS Code EIA Sector Number Reserved.1 Reserved.2 Total Fuel Consumption Quantity Electric Fuel Consumption Quantity Total Fuel Consumption MMBtu Elec Fuel Consumption MMBtu Net Generation (Megawatthours) YEAR
count 6846.000000 100.000000 6846.000000 0.0 6846.000000 6846.000000 0.0 0.0 6.846000e+03 6.846000e+03 6.846000e+03 6.846000e+03 6.846000e+03 6846.0
mean 47395.247590 1.590000 45494.412650 NaN 38365.911043 2.550394 NaN NaN 1.735993e+06 1.523509e+06 5.046923e+06 4.736994e+06 5.018362e+05 2016.0
std 39659.073362 0.697687 38449.729044 NaN 68095.379303 2.051354 NaN NaN 6.397009e+06 5.648343e+06 1.423608e+07 1.412352e+07 1.379263e+06 0.0
min 3.000000 1.000000 21.000000 NaN 22.000000 1.000000 NaN NaN 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 -7.028300e+05 2016.0
25% 6001.250000 1.000000 12492.000000 NaN 22.000000 1.000000 NaN NaN 0.000000e+00 0.000000e+00 6.762500e+02 4.747500e+02 1.809423e+02 2016.0
50% 55061.000000 1.000000 22129.000000 NaN 22.000000 2.000000 NaN NaN 2.378500e+03 1.470500e+03 3.173170e+05 2.088960e+05 3.127657e+04 2016.0
75% 99999.000000 2.000000 99999.000000 NaN 99999.000000 3.000000 NaN NaN 4.469722e+05 2.983672e+05 2.892082e+06 2.288356e+06 2.921652e+05 2016.0
max 99999.000000 4.000000 99999.000000 NaN 562213.000000 7.000000 NaN NaN 1.533434e+08 1.208176e+08 1.505038e+08 1.505038e+08 1.544404e+07 2016.0

In [74]:
generation.head()


Out[74]:
Plant Id Combined Heat And Power Plant Nuclear Unit Id Plant Name Operator Name Operator Id Plant State Census Region NERC Region Reserved ... Netgen September Netgen October Netgen November Netgen December Total Fuel Consumption Quantity Electric Fuel Consumption Quantity Total Fuel Consumption MMBtu Elec Fuel Consumption MMBtu Net Generation (Megawatthours) YEAR
0 3 N NaN Barry Alabama Power Co 195 AL ESC SERC NaN ... 251839 219263 . . 4782719 4782719 4877614 4877614 2386339.000 2016
1 3 N NaN Barry Alabama Power Co 195 AL ESC SERC NaN ... 463646 401506 . . 42413079 42413079 43208127 43208127 4486082.000 2016
2 3 N NaN Barry Alabama Power Co 195 AL ESC SERC NaN ... 473794 286736 . . 1773704 1773704 37426485 37426485 3706973.300 2016
3 3 N NaN Barry Alabama Power Co 195 AL ESC SERC NaN ... 11246.3 14387.7 . . 857741 857741 874585 874585 86059.704 2016
4 4 N NaN Walter Bouldin Dam Alabama Power Co 195 AL ESC SERC NaN ... 3380 884 . . 0 0 4239930 4239930 454977.000 2016

5 rows × 97 columns

Turns out that there are lots of dots (.) where it is no value. I'm going to replace these with zeros.


In [75]:
generation.tail()


Out[75]:
Plant Id Combined Heat And Power Plant Nuclear Unit Id Plant Name Operator Name Operator Id Plant State Census Region NERC Region Reserved ... Netgen September Netgen October Netgen November Netgen December Total Fuel Consumption Quantity Electric Fuel Consumption Quantity Total Fuel Consumption MMBtu Elec Fuel Consumption MMBtu Net Generation (Megawatthours) YEAR
6841 99999 Y NaN State-Fuel Level Increment State-Fuel Level Increment 99999 WY MTN NaN NaN ... 20.03 20.073 . . 2471 197 14391 1144 225.055 2016
6842 99999 Y NaN State-Fuel Level Increment State-Fuel Level Increment 99999 WY MTN NaN NaN ... 8371.86 6890.44 . . 5750996 817170 5662749 804426 84996.866 2016
6843 99999 Y NaN State-Fuel Level Increment State-Fuel Level Increment 99999 WY MTN NaN NaN ... 282.565 112.177 . . 7449697 152028 1362700 27810 3212.683 2016
6844 99999 Y NaN State-Fuel Level Increment State-Fuel Level Increment 99999 WY MTN NaN NaN ... 0 0 . . 0 0 0 0 0.000 2016
6845 99999 Y NaN State-Fuel Level Increment State-Fuel Level Increment 99999 WY MTN NaN NaN ... 10293 7668.97 . . 406340 92966 7110741 1626327 133198.240 2016

5 rows × 97 columns


In [110]:
generation.replace('.', 0, inplace=True)

There are line breaks in the middle of column names. I don't see any breaks or spaces at the beginning or end of names, but will still strip just to be safe.


In [76]:
generation.columns


Out[76]:
Index([u'Plant Id', u'Combined Heat And\nPower Plant', u'Nuclear Unit Id',
       u'Plant Name', u'Operator Name', u'Operator Id', u'Plant State',
       u'Census Region', u'NERC Region', u'Reserved', u'NAICS Code',
       u'EIA Sector Number', u'Sector Name', u'Reported\nPrime Mover',
       u'Reported\nFuel Type Code', u'AER\nFuel Type Code', u'Reserved.1',
       u'Reserved.2', u'Physical\nUnit Label', u'Quantity\nJanuary',
       u'Quantity\nFebruary', u'Quantity\nMarch', u'Quantity\nApril',
       u'Quantity\nMay', u'Quantity\nJune', u'Quantity\nJuly',
       u'Quantity\nAugust', u'Quantity\nSeptember', u'Quantity\nOctober',
       u'Quantity\nNovember', u'Quantity\nDecember', u'Elec_Quantity\nJanuary',
       u'Elec_Quantity\nFebruary', u'Elec_Quantity\nMarch',
       u'Elec_Quantity\nApril', u'Elec_Quantity\nMay', u'Elec_Quantity\nJune',
       u'Elec_Quantity\nJuly', u'Elec_Quantity\nAugust',
       u'Elec_Quantity\nSeptember', u'Elec_Quantity\nOctober',
       u'Elec_Quantity\nNovember', u'Elec_Quantity\nDecember',
       u'MMBtuPer_Unit\nJanuary', u'MMBtuPer_Unit\nFebruary',
       u'MMBtuPer_Unit\nMarch', u'MMBtuPer_Unit\nApril', u'MMBtuPer_Unit\nMay',
       u'MMBtuPer_Unit\nJune', u'MMBtuPer_Unit\nJuly',
       u'MMBtuPer_Unit\nAugust', u'MMBtuPer_Unit\nSeptember',
       u'MMBtuPer_Unit\nOctober', u'MMBtuPer_Unit\nNovember',
       u'MMBtuPer_Unit\nDecember', u'Tot_MMBtu\nJanuary',
       u'Tot_MMBtu\nFebruary', u'Tot_MMBtu\nMarch', u'Tot_MMBtu\nApril',
       u'Tot_MMBtu\nMay', u'Tot_MMBtu\nJune', u'Tot_MMBtu\nJuly',
       u'Tot_MMBtu\nAugust', u'Tot_MMBtu\nSeptember', u'Tot_MMBtu\nOctober',
       u'Tot_MMBtu\nNovember', u'Tot_MMBtu\nDecember', u'Elec_MMBtu\nJanuary',
       u'Elec_MMBtu\nFebruary', u'Elec_MMBtu\nMarch', u'Elec_MMBtu\nApril',
       u'Elec_MMBtu\nMay', u'Elec_MMBtu\nJune', u'Elec_MMBtu\nJuly',
       u'Elec_MMBtu\nAugust', u'Elec_MMBtu\nSeptember', u'Elec_MMBtu\nOctober',
       u'Elec_MMBtu\nNovember', u'Elec_MMBtu\nDecember', u'Netgen\nJanuary',
       u'Netgen\nFebruary', u'Netgen\nMarch', u'Netgen\nApril', u'Netgen\nMay',
       u'Netgen\nJune', u'Netgen\nJuly', u'Netgen\nAugust',
       u'Netgen\nSeptember', u'Netgen\nOctober', u'Netgen\nNovember',
       u'Netgen\nDecember', u'Total Fuel Consumption\nQuantity',
       u'Electric Fuel Consumption\nQuantity',
       u'Total Fuel Consumption\nMMBtu', u'Elec Fuel Consumption\nMMBtu',
       u'Net Generation\n(Megawatthours)', u'YEAR'],
      dtype='object')

In [79]:
generation.columns = [name.strip().replace('\n', ' ') for name in generation.columns]
generation.columns


Out[79]:
Index([u'Plant Id', u'Combined Heat And Power Plant', u'Nuclear Unit Id',
       u'Plant Name', u'Operator Name', u'Operator Id', u'Plant State',
       u'Census Region', u'NERC Region', u'Reserved', u'NAICS Code',
       u'EIA Sector Number', u'Sector Name', u'Reported Prime Mover',
       u'Reported Fuel Type Code', u'AER Fuel Type Code', u'Reserved.1',
       u'Reserved.2', u'Physical Unit Label', u'Quantity January',
       u'Quantity February', u'Quantity March', u'Quantity April',
       u'Quantity May', u'Quantity June', u'Quantity July', u'Quantity August',
       u'Quantity September', u'Quantity October', u'Quantity November',
       u'Quantity December', u'Elec_Quantity January',
       u'Elec_Quantity February', u'Elec_Quantity March',
       u'Elec_Quantity April', u'Elec_Quantity May', u'Elec_Quantity June',
       u'Elec_Quantity July', u'Elec_Quantity August',
       u'Elec_Quantity September', u'Elec_Quantity October',
       u'Elec_Quantity November', u'Elec_Quantity December',
       u'MMBtuPer_Unit January', u'MMBtuPer_Unit February',
       u'MMBtuPer_Unit March', u'MMBtuPer_Unit April', u'MMBtuPer_Unit May',
       u'MMBtuPer_Unit June', u'MMBtuPer_Unit July', u'MMBtuPer_Unit August',
       u'MMBtuPer_Unit September', u'MMBtuPer_Unit October',
       u'MMBtuPer_Unit November', u'MMBtuPer_Unit December',
       u'Tot_MMBtu January', u'Tot_MMBtu February', u'Tot_MMBtu March',
       u'Tot_MMBtu April', u'Tot_MMBtu May', u'Tot_MMBtu June',
       u'Tot_MMBtu July', u'Tot_MMBtu August', u'Tot_MMBtu September',
       u'Tot_MMBtu October', u'Tot_MMBtu November', u'Tot_MMBtu December',
       u'Elec_MMBtu January', u'Elec_MMBtu February', u'Elec_MMBtu March',
       u'Elec_MMBtu April', u'Elec_MMBtu May', u'Elec_MMBtu June',
       u'Elec_MMBtu July', u'Elec_MMBtu August', u'Elec_MMBtu September',
       u'Elec_MMBtu October', u'Elec_MMBtu November', u'Elec_MMBtu December',
       u'Netgen January', u'Netgen February', u'Netgen March', u'Netgen April',
       u'Netgen May', u'Netgen June', u'Netgen July', u'Netgen August',
       u'Netgen September', u'Netgen October', u'Netgen November',
       u'Netgen December', u'Total Fuel Consumption Quantity',
       u'Electric Fuel Consumption Quantity', u'Total Fuel Consumption MMBtu',
       u'Elec Fuel Consumption MMBtu', u'Net Generation (Megawatthours)',
       u'YEAR'],
      dtype='object')

Stack data by month rather than having multiple columns

Not sure if I'll have time for this section

I'm lazy and want to get a list of month names without typing them all


In [82]:
# could have done this as a list comprehension, but it would have been harder to read
months = []
for name in generation.columns:
    if 'Netgen' in name:
        month = name.split()[-1]
        months.append(month)
months


Out[82]:
[u'January',
 u'February',
 u'March',
 u'April',
 u'May',
 u'June',
 u'July',
 u'August',
 u'September',
 u'October',
 u'November',
 u'December']

In [90]:
id_cols = ['Plant Id', 'Plant State', 'NERC Region', 'AER Fuel Type Code']
monthly_cols = []
def find_col_names(cols):
    for col in cols:
        if 'January' in col:
            monthly_cols.append(col.split()[0])

find_col_names(generation.columns)
id_cols + monthly_cols


Out[90]:
['Plant Id',
 'Plant State',
 'NERC Region',
 'AER Fuel Type Code',
 u'Quantity',
 u'Elec_Quantity',
 u'MMBtuPer_Unit',
 u'Tot_MMBtu',
 u'Elec_MMBtu',
 u'Netgen']

In [91]:
pd.DataFrame(columns=id_cols + monthly_cols + ['Month'])


Out[91]:
Plant Id Plant State NERC Region AER Fuel Type Code Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen

In [113]:
gen_list = []
for month in months:
    gen_df = pd.DataFrame(columns=id_cols + monthly_cols)
    
    # Took me a few tries to figure out that I couldn't use .loc for gen_df
    gen_df[id_cols] = generation.loc[:,id_cols]
    gen_df['Month'] = month
    
    for col in monthly_cols:
        gen_df.loc[:,col] = generation.loc[:,col + ' ' + month]
    
    gen_list.append(gen_df)

In [120]:
gen_stack = pd.concat(gen_list)
gen_stack.describe()


Out[120]:
Plant Id Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen
count 82152.000000 8.215200e+04 8.215200e+04 82152.000000 8.215200e+04 8.215200e+04 8.215200e+04
mean 47395.247590 1.446661e+05 1.269590e+05 2.397057 4.205770e+05 3.947495e+05 4.181968e+04
std 39656.418102 6.144502e+05 5.480218e+05 5.409176 1.371208e+06 1.359613e+06 1.331186e+05
min 3.000000 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 -1.164190e+05
25% 6001.000000 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 0.000000e+00
50% 55061.000000 0.000000e+00 0.000000e+00 0.000000 2.546500e+03 1.822500e+03 3.217385e+02
75% 99999.000000 1.622225e+04 9.554750e+03 1.040000 1.742685e+05 1.271422e+05 1.720887e+04
max 99999.000000 1.650352e+07 1.356701e+07 34.180000 1.966522e+07 1.966522e+07 1.940054e+06

Tag lines as using a combustion fuel or not


In [115]:
gen_stack['AER Fuel Type Code'].unique()


Out[115]:
array([u'NG', u'COL', u'HYC', u'DFO', u'NUC', u'WOO', u'HPS', u'SUN',
       u'RFO', u'MLG', u'PC', u'ORW', u'GEO', u'OTH', u'OOG', u'WWW',
       u'WOC', nan, u'WND'], dtype=object)

In [116]:
non_combust = ['HYC', 'NUC', 'SUN', 'GEO', 'WND'] # might be incomplete

In [117]:
def tag_combust(row):
    if row['AER Fuel Type Code'] in non_combust:
        return 0
    else:
        return 1

In [121]:
gen_stack['Combust'] = gen_stack.apply(tag_combust, axis=1)

In [122]:
gen_stack.head()


Out[122]:
Plant Id Plant State NERC Region AER Fuel Type Code Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Month Combust
0 3 AL SERC NG 57253 57253 1.017 58226 58226 268797.000 January 1
1 3 AL SERC NG 5248798 5248798 1.017 5338028 5338028 511773.000 January 1
2 3 AL SERC COL 159951 159951 20.589 3293231 3293231 329513.400 January 1
3 3 AL SERC NG 129803 129803 1.016 131880 131880 13195.605 January 1
4 4 AL SERC HYC 0 0 0.000 1301314 1301314 139641.000 January 0

Now group and sum

Only keep data for May


In [149]:
test = gen_stack.loc[gen_stack['Month']=='May',:].groupby(['Plant Id', 'NERC Region']).sum()
test.head()


Out[149]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust
Plant Id NERC Region
3 SERC 4662196 4662196 24.476 8393587 8393587 1028473.000 4
4 SERC 0 0 0.000 95361 95361 10233.000 0
8 SERC 188233 188233 30.364 4588547 4588547 471836.004 2
10 SERC 116191 116191 2.066 120025 120025 6734.000 7
14 SERC 0 0 0.000 132060 132060 14171.000 0

In [150]:
test.reset_index('NERC Region')


Out[150]:
NERC Region Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust
Plant Id
3 SERC 4662196 4662196 24.476 8393587 8393587 1028473.000 4
4 SERC 0 0 0.000 95361 95361 10233.000 0
8 SERC 188233 188233 30.364 4588547 4588547 471836.004 2
10 SERC 116191 116191 2.066 120025 120025 6734.000 7
14 SERC 0 0 0.000 132060 132060 14171.000 0
15 SERC 0 0 0.000 219444 219444 23548.000 0
16 SERC 0 0 0.000 145265 145265 15588.000 0
17 SERC 0 0 0.000 193742 193742 20790.000 0
18 SERC 0 0 0.000 54386 54386 5836.000 0
26 SERC 197510 197510 30.196 4577634 4577634 453833.000 4
46 SERC 0 0 0.000 25805554 25805554 2467542.000 0
47 SERC 58 58 5.800 336 336 62.000 7
48 SERC 0 0 0.000 177946 177946 19095.000 0
49 SERC 0 0 0.000 261286 261286 28038.000 0
50 SERC 0 0 0.000 0 0 0.000 2
51 SPP 241664 241664 14.902 2799148 2799148 238020.998 3
54 SERC 177832 177832 1.066 189569 189569 15037.000 2
56 SERC 49159 49159 28.560 1098557 1098557 97056.000 2
60 MRO 24713 24713 22.206 404792 404792 31448.000 2
64 ASCC 102 102 5.712 583 583 21.000 2
77 ASCC 0 0 0.000 84617 84617 9080.000 0
78 ASCC 0 0 0.000 157007 157007 16848.000 0
87 WECC 52499 52499 19.578 867759 867759 74925.000 2
96 ASCC 76562 76562 1.000 76562 76562 3455.000 1
104 WECC 106 106 0.000 0 0 44845.000 1
108 SPP 102821 102821 18.837 1607158 1607158 146461.005 2
113 WECC 68608 68608 24.756 1253737 1253737 103305.000 4
116 WECC 92596 92596 2.070 96601 96601 8323.000 4
117 WECC 1112689 1112689 3.117 1156084 1156084 130221.000 5
118 WECC 40761 40761 1.060 43207 43207 1618.000 3
... ... ... ... ... ... ... ... ...
59965 WECC 0 0 0.000 89956 89956 9653.000 0
59974 WECC 0 0 0.000 690044 690044 74047.000 0
59975 WECC 0 0 0.000 317582 317582 34079.000 0
60008 WECC 0 0 0.000 127866 127866 13721.000 0
60013 SPP 0 0 0.000 410055 410055 44002.000 0
60049 WECC 0 0 0.000 306101 306101 32847.000 0
60058 SERC 0 0 0.000 0 0 0.000 0
60059 TRE 0 0 0.000 295142 295142 31671.000 0
60064 SERC 0 0 0.000 0 0 0.000 0
60069 SPP 0 0 0.000 612939 612939 65773.000 0
60082 SERC 0 0 0.000 0 0 0.000 0
60087 TRE 0 0 0.000 0 0 0.000 0
60092 WECC 0 0 0.000 0 0 0.000 0
60093 WECC 0 0 0.000 263569 263569 28283.000 0
60104 TRE 0 0 0.000 796420 796420 85462.000 0
60127 RFC 0 0 0.000 0 0 0.000 0
60259 WECC 0 0 0.000 0 0 0.000 0
60262 SPP 0 0 0.000 240132 240132 25768.000 0
60285 WECC 0 0 0.000 0 0 0.000 0
60314 SPP 0 0 0.000 103935 103935 11153.000 0
60326 MRO 0 0 0.000 0 0 0.000 0
60339 TRE 0 0 0.000 0 0 0.000 0
60342 MRO 0 0 0.000 0 0 0.000 0
60351 WECC 0 0 0.000 0 0 0.000 0
60389 WECC 0 0 0.000 0 0 0.000 0
60405 SPP 0 0 0.000 0 0 0.000 0
60406 SPP 0 0 0.000 0 0 0.000 0
60419 WECC 0 0 0.000 142450 142450 15286.000 0
60445 WECC 0 0 0.000 0 0 0.000 0
60486 WECC 0 0 0.000 0 0 0.000 0

2260 rows × 8 columns


In [123]:
facility_gen = gen_stack.loc[gen_stack['Month']=='May',:].groupby('Plant Id').sum()

In [124]:
facility_gen.head()


Out[124]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust
Plant Id
3 4662196 4662196 24.476 8393587 8393587 1028473.000 4
4 0 0 0.000 95361 95361 10233.000 0
8 188233 188233 30.364 4588547 4588547 471836.004 2
10 116191 116191 2.066 120025 120025 6734.000 7
14 0 0 0.000 132060 132060 14171.000 0

If I want to keep the NERC Region, I can do that in the groupby


In [153]:
facility_gen = gen_stack.loc[gen_stack['Month']=='May',:].groupby(['Plant Id', 'NERC Region']).sum()
facility_gen.head()


Out[153]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust
Plant Id NERC Region
3 SERC 4662196 4662196 24.476 8393587 8393587 1028473.000 4
4 SERC 0 0 0.000 95361 95361 10233.000 0
8 SERC 188233 188233 30.364 4588547 4588547 471836.004 2
10 SERC 116191 116191 2.066 120025 120025 6734.000 7
14 SERC 0 0 0.000 132060 132060 14171.000 0

In [154]:
facility_gen.reset_index('NERC Region', inplace=True)
facility_gen.head()


Out[154]:
NERC Region Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust
Plant Id
3 SERC 4662196 4662196 24.476 8393587 8393587 1028473.000 4
4 SERC 0 0 0.000 95361 95361 10233.000 0
8 SERC 188233 188233 30.364 4588547 4588547 471836.004 2
10 SERC 116191 116191 2.066 120025 120025 6734.000 7
14 SERC 0 0 0.000 132060 132060 14171.000 0

Merge data from all three sources


In [155]:
merged = facility_gen.merge(facility_cap, how='inner', left_index=True, right_index=True)

In [156]:
merged.describe()


Out[156]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust Nameplate Capacity (MW)
count 2.204000e+03 2.204000e+03 2204.000000 2.204000e+03 2.204000e+03 2.204000e+03 2204.000000 2204.000000
mean 4.341980e+05 3.940222e+05 6.088443 1.324327e+06 1.269274e+06 1.349217e+05 1.699183 484.658122
std 1.116450e+06 1.004303e+06 12.526411 2.633492e+06 2.631382e+06 2.663379e+05 1.944481 615.046700
min 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 -6.372300e+04 0.000000 1.800000
25% 0.000000e+00 0.000000e+00 0.000000 1.038605e+05 9.849125e+04 1.000100e+04 0.000000 94.575000
50% 3.765000e+02 2.850000e+02 0.924500 3.455885e+05 3.064225e+05 3.220900e+04 1.000000 233.900000
75% 2.545605e+05 2.182568e+05 3.066000 1.264416e+06 1.035504e+06 1.138057e+05 3.000000 650.000000
max 1.622117e+07 1.124778e+07 99.754000 2.580555e+07 2.580555e+07 2.467542e+06 11.000000 6809.000000

Save the non-combustion units, because I'm going to join the merged dataframe with the emissions dataframe and want to add back in the non-combustion


In [157]:
non_combust = merged.loc[merged['Combust']==0,:]
non_combust.describe()


Out[157]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust Nameplate Capacity (MW)
count 933.0 933.0 933.0 9.330000e+02 9.330000e+02 9.330000e+02 933.0 933.000000
mean 0.0 0.0 0.0 1.090682e+06 1.090682e+06 1.088466e+05 0.0 248.966131
std 0.0 0.0 0.0 3.094933e+06 3.094933e+06 2.976431e+05 0.0 463.451807
min 0.0 0.0 0.0 0.000000e+00 0.000000e+00 -1.500000e+01 0.0 1.800000
25% 0.0 0.0 0.0 1.239710e+05 1.239710e+05 1.330300e+04 0.0 61.200000
50% 0.0 0.0 0.0 2.510540e+05 2.510540e+05 2.694000e+04 0.0 110.000000
75% 0.0 0.0 0.0 4.913350e+05 4.913350e+05 5.272400e+04 0.0 200.000000
max 0.0 0.0 0.0 2.580555e+07 2.580555e+07 2.467542e+06 0.0 4209.600000

In [158]:
non_combust.head()


Out[158]:
NERC Region Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust Nameplate Capacity (MW)
4 SERC 0 0 0.0 95361 95361 10233.0 0 225.0
14 SERC 0 0 0.0 132060 132060 14171.0 0 128.1
15 SERC 0 0 0.0 219444 219444 23548.0 0 177.0
16 SERC 0 0 0.0 145265 145265 15588.0 0 210.6
17 SERC 0 0 0.0 193742 193742 20790.0 0 170.0

In [159]:
merged = merged.merge(facility_emiss, how='inner', left_index=True, right_index=True)
merged.describe()


Out[159]:
Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust Nameplate Capacity (MW) Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region
count 9.020000e+02 9.020000e+02 902.000000 9.020000e+02 9.020000e+02 9.020000e+02 902.000000 902.000000 902.000000 7.870000e+02 2.800000e+01 804.000000 810.000000 810.000000 7.820000e+02 8.100000e+02 902.000000
mean 8.026699e+05 7.984092e+05 9.428446 1.737592e+06 1.732175e+06 1.880601e+05 2.871397 811.238581 825.084268 2.198615e+05 4.814780e+05 107.486081 0.493989 92.464405 1.687089e+05 1.937353e+06 5.252772
std 1.368488e+06 1.361816e+06 12.991945 2.442123e+06 2.440015e+06 2.605284e+05 1.620855 651.587349 892.628864 2.760750e+05 5.343461e+05 300.205896 1.316566 187.144645 2.448701e+05 2.477358e+06 2.177569
min 0.000000e+00 0.000000e+00 0.000000 0.000000e+00 0.000000e+00 -5.056000e+03 1.000000 27.000000 0.000000 0.000000e+00 5.103100e+02 0.000000 -7.815200 0.000000 0.000000e+00 3.400000e+00 1.000000
25% 3.070300e+04 3.037650e+04 1.031250 6.053450e+04 6.040225e+04 4.117250e+03 2.000000 349.050000 89.235000 1.383868e+04 7.159151e+04 0.086750 0.059175 3.705000 1.273775e+04 1.515684e+05 4.000000
50% 2.210980e+05 2.210980e+05 2.069500 7.232245e+05 7.149535e+05 7.754000e+04 2.000000 631.000000 579.315000 1.152687e+05 3.027295e+05 0.557500 0.179250 12.956000 8.169769e+04 1.027198e+06 5.000000
75% 9.083002e+05 9.083002e+05 18.052750 2.485223e+06 2.470881e+06 2.947978e+05 4.000000 1071.050000 1326.427500 3.320322e+05 6.532815e+05 44.930750 0.435050 73.797000 2.095907e+05 2.694102e+06 7.000000
max 1.124778e+07 1.124778e+07 75.589000 1.681146e+07 1.681146e+07 1.775310e+06 11.000000 4317.500000 5190.810000 1.652769e+06 1.783969e+06 3639.271000 17.099800 1722.928000 1.607836e+06 1.567091e+07 10.000000

In [160]:
merged.head()


Out[160]:
NERC Region Quantity Elec_Quantity MMBtuPer_Unit Tot_MMBtu Elec_MMBtu Netgen Combust Nameplate Capacity (MW) Operating Time Gross Load (MW-h) Steam Load (1000lb) SO2 (tons) Avg. NOx Rate (lb/MMBtu) NOx (tons) CO2 (short tons) Heat Input (MMBtu) EPA Region
3 SERC 4662196 4662196 24.476 8393587 8393587 1028473.000 4 2569.5 3844.00 1048450.25 NaN 595.911 0.5760 411.165 677078.976 8471790.125 4.0
8 SERC 188233 188233 30.364 4588547 4588547 471836.004 2 1166.7 1837.25 505777.25 NaN 125.662 0.5375 440.348 531418.603 5179513.525 4.0
10 SERC 116191 116191 2.066 120025 120025 6734.000 7 1288.4 188.25 8897.75 NaN 48.579 0.8474 10.162 11340.124 144258.150 4.0
26 SERC 197510 197510 30.196 4577634 4577634 453833.000 4 2034.0 744.00 495791.00 NaN 173.698 0.0646 165.370 524604.900 5113099.400 4.0
47 SERC 58 58 5.800 336 336 62.000 7 1026.0 20.00 177.00 NaN 0.333 4.4379 0.722 192.400 2765.600 4.0

Now concat the two dataframes


In [161]:
final = pd.concat([merged, non_combust])
final


Out[161]:
Avg. NOx Rate (lb/MMBtu) CO2 (short tons) Combust EPA Region Elec_MMBtu Elec_Quantity Gross Load (MW-h) Heat Input (MMBtu) MMBtuPer_Unit NERC Region NOx (tons) Nameplate Capacity (MW) Netgen Operating Time Quantity SO2 (tons) Steam Load (1000lb) Tot_MMBtu
3 0.5760 677078.976 4 4.0 8393587 4662196 1048450.25 8471790.125 24.476 SERC 411.165 2569.5 1028473.000 3844.00 4662196 595.911 NaN 8393587
8 0.5375 531418.603 2 4.0 4588547 188233 505777.25 5179513.525 30.364 SERC 440.348 1166.7 471836.004 1837.25 188233 125.662 NaN 4588547
10 0.8474 11340.124 7 4.0 120025 116191 8897.75 144258.150 2.066 SERC 10.162 1288.4 6734.000 188.25 116191 48.579 NaN 120025
26 0.0646 524604.900 4 4.0 4577634 197510 495791.00 5113099.400 30.196 SERC 165.370 2034.0 453833.000 744.00 197510 173.698 NaN 4577634
47 4.4379 192.400 7 4.0 336 58 177.00 2765.600 5.800 SERC 0.722 1026.0 62.000 20.00 58 0.333 NaN 336
50 NaN NaN 2 4.0 0 0 NaN NaN 0.000 SERC NaN 575.0 0.000 0.00 0 NaN NaN 0
51 0.2080 270128.775 3 6.0 2799148 241664 262392.63 2481213.560 14.902 SPP 241.863 720.7 238020.998 553.48 241664 1081.656 NaN 2799148
54 0.3753 11101.898 2 4.0 189569 177832 15652.65 186799.693 1.066 SERC 2.787 1055.0 15037.000 254.45 177832 0.057 NaN 189569
56 0.5125 129301.129 2 4.0 1098557 49159 112793.47 1260257.488 28.560 SERC 167.878 538.0 97056.000 759.23 49159 63.902 NaN 1098557
60 0.2169 44877.292 2 7.0 404792 24713 36111.00 427905.242 22.206 MRO 39.165 324.3 31448.000 753.95 24713 176.503 NaN 404792
87 0.3811 88806.200 2 6.0 867759 52499 82439.00 846735.000 19.578 WECC 161.168 257.0 74925.000 744.00 52499 34.069 NaN 867759
108 0.1813 173507.748 2 7.0 1607158 102821 160882.24 1654354.527 18.837 SPP 151.412 348.7 146461.005 660.18 102821 95.013 NaN 1607158
113 0.6462 135364.206 4 9.0 1253737 68608 120001.76 1319344.965 24.756 WECC 149.985 839.9 103305.000 992.93 68608 57.635 NaN 1253737
116 0.2216 6368.940 4 9.0 96601 92596 8653.84 107186.519 2.070 WECC 7.130 334.0 8323.000 247.40 92596 0.032 NaN 96601
117 0.0754 50958.633 5 9.0 1156084 1112689 109950.39 857449.725 3.117 WECC 7.739 1207.4 130221.000 623.34 1112689 0.257 NaN 1156084
118 0.0507 1333.907 3 9.0 43207 40761 1491.10 22445.386 1.060 WECC 0.447 184.5 1618.000 37.80 40761 0.007 NaN 43207
120 0.1839 23956.571 4 9.0 361310 350394 35334.66 403122.659 7.834 WECC 26.270 425.0 33532.000 748.53 350394 0.120 NaN 361310
126 0.5604 50133.139 7 9.0 791633 768575 76803.54 822072.677 2.060 WECC 69.574 558.5 65563.000 1101.56 768575 2.119 NaN 791633
127 0.2907 145337.271 2 6.0 1372507 80710 129794.52 1385756.804 23.091 TRE 203.293 720.0 118269.996 401.58 80710 73.983 NaN 1372507
130 0.2077 860955.931 3 4.0 7687923 310705 822856.12 8391402.264 30.740 SERC 289.871 2390.1 761041.000 1518.03 310705 370.759 NaN 7687923
136 0.1152 682395.436 2 4.0 6351164 258794 689645.88 6651035.344 30.690 FRCC 187.062 1471.8 639443.003 1451.60 258794 477.829 NaN 6351164
141 0.5300 2730.960 4 9.0 47526 46042 4296.61 45960.324 7.844 WECC 11.359 613.5 3116.000 67.49 46042 0.014 NaN 47526
147 0.0101 54824.884 4 9.0 916075 888531 130042.08 922519.859 3.093 WECC 3.965 573.7 126819.000 611.23 888531 0.277 NaN 916075
160 1.0331 185387.782 5 9.0 1900767 295817 185441.25 1905952.909 45.247 WECC 359.939 660.7 163034.000 1970.04 295817 20.620 NaN 1900767
165 0.2807 239423.987 3 6.0 2191811 149290 191538.88 2290169.297 17.700 SPP 170.876 1134.0 174269.002 603.34 149290 506.266 NaN 2191811
170 0.2584 83529.374 1 6.0 1420943 1382240 139956.89 1405547.366 1.028 SERC 239.594 552.5 133914.000 638.74 1382240 0.422 NaN 1420943
201 NaN NaN 4 6.0 0 0 NaN NaN 0.000 SPP NaN 185.0 0.000 0.00 0 NaN NaN 0
202 NaN NaN 2 6.0 0 0 NaN NaN 0.000 SERC NaN 120.0 0.000 0.00 0 NaN NaN 0
203 NaN NaN 2 6.0 0 0 NaN NaN 0.000 SERC NaN 136.0 0.000 0.00 0 NaN NaN 0
207 0.7591 310498.110 3 4.0 3025091 163643 294306.40 3031531.603 23.227 FRCC 649.866 1358.0 266681.002 887.75 163643 182.552 NaN 3025091
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
59621 NaN NaN 0 NaN 854636 0 NaN NaN 0.000 TRE NaN 299.7 91709.000 NaN 0 NaN NaN 854636
59634 NaN NaN 0 NaN 132022 0 NaN NaN 0.000 WECC NaN 45.0 14167.000 NaN 0 NaN NaN 132022
59637 NaN NaN 0 NaN 329035 0 NaN NaN 0.000 MRO NaN 151.4 35308.000 NaN 0 NaN NaN 329035
59654 NaN NaN 0 NaN 243748 0 NaN NaN 0.000 TRE NaN 78.0 26156.000 NaN 0 NaN NaN 243748
59655 NaN NaN 0 NaN 313100 0 NaN NaN 0.000 MRO NaN 97.8 33598.000 NaN 0 NaN NaN 313100
59702 NaN NaN 0 NaN 141779 0 NaN NaN 0.000 WECC NaN 50.0 15214.000 NaN 0 NaN NaN 141779
59723 NaN NaN 0 NaN 141024 0 NaN NaN 0.000 WECC NaN 30.0 15133.000 NaN 0 NaN NaN 141024
59732 NaN NaN 0 NaN 408918 0 NaN NaN 0.000 TRE NaN 150.0 43880.000 NaN 0 NaN NaN 408918
59733 NaN NaN 0 NaN 392479 0 NaN NaN 0.000 TRE NaN 150.0 42116.000 NaN 0 NaN NaN 392479
59734 NaN NaN 0 NaN 405451 0 NaN NaN 0.000 TRE NaN 150.0 43508.000 NaN 0 NaN NaN 405451
59778 NaN NaN 0 NaN 65186 0 NaN NaN 0.000 SERC NaN 47.7 6995.000 NaN 0 NaN NaN 65186
59837 NaN NaN 0 NaN 465577 0 NaN NaN 0.000 SPP NaN 150.0 49960.000 NaN 0 NaN NaN 465577
59838 NaN NaN 0 NaN 181338 0 NaN NaN 0.000 SPP NaN 50.0 19459.000 NaN 0 NaN NaN 181338
59862 NaN NaN 0 NaN 58831 0 NaN NaN 0.000 SERC NaN 30.0 6313.000 NaN 0 NaN NaN 58831
59894 NaN NaN 0 NaN 66202 0 NaN NaN 0.000 SERC NaN 30.0 7104.000 NaN 0 NaN NaN 66202
59943 NaN NaN 0 NaN 757169 0 NaN NaN 0.000 TRE NaN 207.2 81250.000 NaN 0 NaN NaN 757169
59944 NaN NaN 0 NaN 144920 0 NaN NaN 0.000 SERC NaN 80.0 15551.000 NaN 0 NaN NaN 144920
59965 NaN NaN 0 NaN 89956 0 NaN NaN 0.000 WECC NaN 62.1 9653.000 NaN 0 NaN NaN 89956
59974 NaN NaN 0 NaN 690044 0 NaN NaN 0.000 WECC NaN 250.0 74047.000 NaN 0 NaN NaN 690044
59975 NaN NaN 0 NaN 317582 0 NaN NaN 0.000 WECC NaN 150.0 34079.000 NaN 0 NaN NaN 317582
60008 NaN NaN 0 NaN 127866 0 NaN NaN 0.000 WECC NaN 52.0 13721.000 NaN 0 NaN NaN 127866
60013 NaN NaN 0 NaN 410055 0 NaN NaN 0.000 SPP NaN 151.8 44002.000 NaN 0 NaN NaN 410055
60049 NaN NaN 0 NaN 306101 0 NaN NaN 0.000 WECC NaN 80.0 32847.000 NaN 0 NaN NaN 306101
60059 NaN NaN 0 NaN 295142 0 NaN NaN 0.000 TRE NaN 110.0 31671.000 NaN 0 NaN NaN 295142
60069 NaN NaN 0 NaN 612939 0 NaN NaN 0.000 SPP NaN 198.6 65773.000 NaN 0 NaN NaN 612939
60093 NaN NaN 0 NaN 263569 0 NaN NaN 0.000 WECC NaN 109.8 28283.000 NaN 0 NaN NaN 263569
60104 NaN NaN 0 NaN 796420 0 NaN NaN 0.000 TRE NaN 249.7 85462.000 NaN 0 NaN NaN 796420
60262 NaN NaN 0 NaN 240132 0 NaN NaN 0.000 SPP NaN 73.4 25768.000 NaN 0 NaN NaN 240132
60314 NaN NaN 0 NaN 103935 0 NaN NaN 0.000 SPP NaN 35.8 11153.000 NaN 0 NaN NaN 103935
60419 NaN NaN 0 NaN 142450 0 NaN NaN 0.000 WECC NaN 25.0 15286.000 NaN 0 NaN NaN 142450

1835 rows × 18 columns


In [162]:
final.index.rename('Plant ID', inplace=True)

In [163]:
final.describe()


Out[163]:
Avg. NOx Rate (lb/MMBtu) CO2 (short tons) Combust EPA Region Elec_MMBtu Elec_Quantity Gross Load (MW-h) Heat Input (MMBtu) MMBtuPer_Unit NOx (tons) Nameplate Capacity (MW) Netgen Operating Time Quantity SO2 (tons) Steam Load (1000lb) Tot_MMBtu
count 810.000000 7.820000e+02 1835.000000 902.000000 1.835000e+03 1.835000e+03 7.870000e+02 8.100000e+02 1835.000000 810.000000 1835.000000 1.835000e+03 902.000000 1.835000e+03 804.000000 2.800000e+01 1.835000e+03
mean 0.493989 1.687089e+05 1.411444 5.252772 1.406010e+06 3.924605e+05 2.198615e+05 1.937353e+06 4.634582 92.464405 525.352916 1.477843e+05 825.084268 3.945549e+05 107.486081 4.814780e+05 1.408673e+06
std 1.316566 2.448701e+05 1.830965 2.177569 2.809884e+06 1.034649e+06 2.760750e+05 2.477358e+06 10.254386 187.144645 629.910855 2.827274e+05 892.628864 1.039785e+06 300.205896 5.343461e+05 2.811094e+06
min -7.815200 0.000000e+00 0.000000 1.000000 0.000000e+00 0.000000e+00 0.000000e+00 3.400000e+00 0.000000 0.000000 1.800000 -5.056000e+03 0.000000 0.000000e+00 0.000000 5.103100e+02 0.000000e+00
25% 0.059175 1.273775e+04 0.000000 4.000000 1.052255e+05 0.000000e+00 1.383868e+04 1.515684e+05 0.000000 3.705000 102.000000 1.072450e+04 89.235000 0.000000e+00 0.086750 7.159151e+04 1.056730e+05
50% 0.179250 8.169769e+04 0.000000 5.000000 3.320640e+05 0.000000e+00 1.152687e+05 1.027198e+06 0.000000 12.956000 257.000000 3.389400e+04 579.315000 0.000000e+00 0.557500 3.027295e+05 3.320640e+05
75% 0.435050 2.095907e+05 2.000000 7.000000 1.256538e+06 2.050515e+05 3.320322e+05 2.694102e+06 2.064000 73.797000 696.150000 1.284585e+05 1326.427500 2.050515e+05 44.930750 6.532815e+05 1.256538e+06
max 17.099800 1.607836e+06 11.000000 10.000000 2.580555e+07 1.124778e+07 1.652769e+06 1.567091e+07 75.589000 1722.928000 4317.500000 2.467542e+06 5190.810000 1.124778e+07 3639.271000 1.783969e+06 2.580555e+07

In [164]:
final['CO2 (short tons)'].sum() * 2000 * 2.2046 / final['Netgen'].sum()


Out[164]:
2145.06392182917