Extract facility generation and fuel use data

This notebook creates dataframes with monthly facility generation and fuel use data, merges them, and exports the results. The code assumes that you have already downloaded an ELEC.txt file from EIA's bulk download website.



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import io, time, json
import requests
from bs4 import BeautifulSoup
import pandas as pd
import urllib
import re
import os
import numpy as np
from scripts import facility_line_to_df
from joblib import Parallel, delayed

Read `ELEC.txt` file



In [2]:

    
path = os.path.join('Raw data', 'Electricity data', '2017-03-15 ELEC.txt')
with open(path, 'rb') as f:
    raw_txt = f.readlines()

Filter lines to only include facility generation

Include ELEC.PLANT in the series_id
Include "All" as the only allowable prime mover
- Some facilities have incorrect data at the individual prime mover level
Do not include "All" as a fuel code
Only monthly frequency



In [3]:

    
gen_rows = [row for row in raw_txt if 'ELEC.PLANT.GEN' in row and 'series_id' in row and 'ALL.M' in row and 'ALL-' not in row]
total_fuel_rows = [row for row in raw_txt if 'ELEC.PLANT.CONS_TOT_BTU' in row and 'series_id' in row and 'ALL.M' in row and 'ALL-' not in row]
eg_fuel_rows = [row for row in raw_txt if 'ELEC.PLANT.CONS_EG_BTU' in row and 'series_id' in row and 'ALL.M' in row and 'ALL-' not in row]

Combine generation into one large dataframe



In [4]:

    
if __name__ == '__main__':
    exception_list = []
    facility_gen = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in gen_rows))
    facility_gen.reset_index(drop=True, inplace=True)
    facility_gen.rename_axis({'value':'generation (MWh)'}, axis=1, inplace=True)



In [5]:

    
facility_gen.loc[:,'lat'] = facility_gen.loc[:,'lat'].astype(float)
facility_gen.loc[:,'lon'] = facility_gen.loc[:,'lon'].astype(float)
facility_gen.loc[:, 'plant id'] = facility_gen.loc[:, 'plant id'].astype(int)



In [6]:

    
#drop
facility_gen.tail()









    Out[6]:






  
    
      
      f
      fuel
      geography
      last_updated
      lat
      lon
      month
      plant id
      prime mover
      series_id
      units
      generation (MWh)
      year
    
  
  
    
      1620516
      M
      MSW
      USA-NY
      2016-07-07T17:18:42-04:00
      40.7389
      -73.5906
      5
      10642
      ALL
      ELEC.PLANT.GEN.10642-MSW-ALL.M
      megawatthours
      42745.0
      2001
    
    
      1620517
      M
      MSW
      USA-NY
      2016-07-07T17:18:42-04:00
      40.7389
      -73.5906
      4
      10642
      ALL
      ELEC.PLANT.GEN.10642-MSW-ALL.M
      megawatthours
      45311.0
      2001
    
    
      1620518
      M
      MSW
      USA-NY
      2016-07-07T17:18:42-04:00
      40.7389
      -73.5906
      3
      10642
      ALL
      ELEC.PLANT.GEN.10642-MSW-ALL.M
      megawatthours
      49284.0
      2001
    
    
      1620519
      M
      MSW
      USA-NY
      2016-07-07T17:18:42-04:00
      40.7389
      -73.5906
      2
      10642
      ALL
      ELEC.PLANT.GEN.10642-MSW-ALL.M
      megawatthours
      40350.0
      2001
    
    
      1620520
      M
      MSW
      USA-NY
      2016-07-07T17:18:42-04:00
      40.7389
      -73.5906
      1
      10642
      ALL
      ELEC.PLANT.GEN.10642-MSW-ALL.M
      megawatthours
      41493.0
      2001



In [7]:

    
#drop
facility_gen['fuel'].unique(), facility_gen['prime mover'].unique()









    Out[7]:





(array([u'DFO', u'WO', u'BIT', u'NG', u'SLW', u'WDS', u'BLQ', u'RFO',
        u'WAT', u'OG', u'GEO', u'OTH', u'OBG', u'SUB', u'WH', u'WND',
        u'MSB', u'MSN', u'OBS', u'KER', u'PC', u'BFG', u'PG', u'TDF',
        u'SGC', u'WC', u'JF', u'LFG', u'OBL', u'SUN', u'PUR', u'AB', u'WDL',
        u'NUC', u'SC', u'LIG', u'SGP', u'RC', u'MSW'], dtype=object),
 array([u'ALL'], dtype=object))



In [8]:

    
#drop
path = os.path.join('Clean data', 'Facility generation.csv')
facility_gen.to_csv(path, index=False)

Combine total fuel use into one large dataframe



In [9]:

    
if __name__ == '__main__':
    exception_list = []
    facility_all_fuel = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in total_fuel_rows))
    facility_all_fuel.reset_index(drop=True, inplace=True)
    facility_all_fuel.rename_axis({'value':'total fuel (mmbtu)'}, axis=1, inplace=True)



In [10]:

    
facility_all_fuel.loc[:,'lat'] = facility_all_fuel.loc[:,'lat'].astype(float)
facility_all_fuel.loc[:,'lon'] = facility_all_fuel.loc[:,'lon'].astype(float)
facility_all_fuel.loc[:,'plant id'] = facility_all_fuel.loc[:,'plant id'].astype(int)



In [11]:

    
#drop
facility_all_fuel.head()









    Out[11]:






  
    
      
      f
      fuel
      geography
      last_updated
      lat
      lon
      month
      plant id
      prime mover
      series_id
      units
      total fuel (mmbtu)
      year
    
  
  
    
      0
      M
      WAT
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      12
      10140
      ALL
      ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M
      MMBtu
      0.0
      2016
    
    
      1
      M
      WAT
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      11
      10140
      ALL
      ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M
      MMBtu
      0.0
      2016
    
    
      2
      M
      WAT
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      10
      10140
      ALL
      ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M
      MMBtu
      0.0
      2016
    
    
      3
      M
      WAT
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      9
      10140
      ALL
      ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M
      MMBtu
      0.0
      2016
    
    
      4
      M
      WAT
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      8
      10140
      ALL
      ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M
      MMBtu
      0.0
      2016



In [12]:

    
#drop
path = os.path.join('Clean data', 'Facility total fuel consumption.csv')
facility_all_fuel.to_csv(path, index=False)

Combine total fuel use for electricity into one large dataframe



In [13]:

    
if __name__ == '__main__':
    exception_list = []
    facility_eg_fuel = pd.concat(Parallel(n_jobs=-1)(delayed(facility_line_to_df)(json.loads(row)) for row in eg_fuel_rows))
    facility_eg_fuel.reset_index(drop=True, inplace=True)
    facility_eg_fuel.rename_axis({'value':'elec fuel (mmbtu)'}, axis=1, inplace=True)



In [14]:

    
facility_eg_fuel.loc[:,'lat'] = facility_eg_fuel.loc[:,'lat'].astype(float)
facility_eg_fuel.loc[:,'lon'] = facility_eg_fuel.loc[:,'lon'].astype(float)
facility_eg_fuel.loc[:,'plant id'] = facility_eg_fuel.loc[:,'plant id'].astype(int)



In [15]:

    
#drop
facility_eg_fuel.tail()









    Out[15]:






  
    
      
      f
      fuel
      geography
      last_updated
      lat
      lon
      month
      plant id
      prime mover
      series_id
      units
      elec fuel (mmbtu)
      year
    
  
  
    
      1617999
      M
      DFO
      USA-NJ
      2016-07-08T15:27:41-04:00
      NaN
      NaN
      3
      50313
      ALL
      ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M
      MMBtu
      0.0
      2002
    
    
      1618000
      M
      DFO
      USA-NJ
      2016-07-08T15:27:41-04:00
      NaN
      NaN
      2
      50313
      ALL
      ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M
      MMBtu
      0.0
      2002
    
    
      1618001
      M
      DFO
      USA-NJ
      2016-07-08T15:27:41-04:00
      NaN
      NaN
      1
      50313
      ALL
      ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M
      MMBtu
      0.0
      2002
    
    
      1618002
      M
      DFO
      USA-NJ
      2016-07-08T15:27:41-04:00
      NaN
      NaN
      12
      50313
      ALL
      ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M
      MMBtu
      225.0
      2001
    
    
      1618003
      M
      DFO
      USA-NJ
      2016-07-08T15:27:41-04:00
      NaN
      NaN
      1
      50313
      ALL
      ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M
      MMBtu
      9671.0
      2001



In [16]:

    
#drop
path = os.path.join('Clean data', 'Facility electric fuel consumption.csv')
facility_eg_fuel.to_csv(path, index=False)

Reload dataframes from files

Goal is to merge the fuel use and fuel use for electricity back into the generation dataframe



In [3]:

    
#drop
path1 = os.path.join('Clean data', 'Facility generation.csv')
path2 = os.path.join('Clean data', 'Facility total fuel consumption.csv')
path3 = os.path.join('Clean data', 'Facility electric fuel consumption.csv')



In [4]:

    
#drop
facility_gen = pd.read_csv(path1)



In [13]:

    
#drop
facility_gen.head()









    Out[13]:






  
    
      
      f
      fuel
      geography
      last_updated
      lat
      lon
      month
      plant id
      prime mover
      series_id
      units
      generation (MWh)
      year
    
  
  
    
      0
      M
      MSB
      USA-MD
      2017-01-26T09:12:59-05:00
      39.266041
      -76.629653
      11
      10629
      ST
      ELEC.PLANT.GEN.10629-MSB-ST.M
      megawatthours
      17893.210
      2016
    
    
      1
      M
      MSB
      USA-MD
      2017-01-26T09:12:59-05:00
      39.266041
      -76.629653
      10
      10629
      ST
      ELEC.PLANT.GEN.10629-MSB-ST.M
      megawatthours
      16329.693
      2016
    
    
      2
      M
      MSB
      USA-MD
      2017-01-26T09:12:59-05:00
      39.266041
      -76.629653
      9
      10629
      ST
      ELEC.PLANT.GEN.10629-MSB-ST.M
      megawatthours
      16918.934
      2016
    
    
      3
      M
      MSB
      USA-MD
      2017-01-26T09:12:59-05:00
      39.266041
      -76.629653
      8
      10629
      ST
      ELEC.PLANT.GEN.10629-MSB-ST.M
      megawatthours
      16424.989
      2016
    
    
      4
      M
      MSB
      USA-MD
      2017-01-26T09:12:59-05:00
      39.266041
      -76.629653
      7
      10629
      ST
      ELEC.PLANT.GEN.10629-MSB-ST.M
      megawatthours
      17069.401
      2016



In [5]:

    
#drop
facility_all_fuel = pd.read_csv(path2)



In [6]:

    
#drop
facility_eg_fuel = pd.read_csv(path3)

Merge dataframes

Need to be careful here because there are fuel/prime mover combinations that have generation but no fuel use (e.g. the steam cycle of a combined cycle system - but only in some cases).



In [17]:

    
keep_cols = ['fuel', 'generation (MWh)', 'month', 'plant id', 'prime mover', 'year',
             'geography', 'lat', 'lon', 'last_updated']
merge_cols = ['fuel', 'month', 'plant id', 'year']
gen_total_fuel = facility_all_fuel.merge(facility_gen.loc[:,keep_cols], 
                                    how='outer', on=merge_cols)



In [18]:

    
#drop
gen_total_fuel.loc[gen_total_fuel['geography_x'].isnull()].head()









    Out[18]:






  
    
      
      f
      fuel
      geography_x
      last_updated_x
      lat_x
      lon_x
      month
      plant id
      prime mover_x
      series_id
      units
      total fuel (mmbtu)
      year
      generation (MWh)
      prime mover_y
      geography_y
      lat_y
      lon_y
      last_updated_y
    
  
  
    
      1619174
      NaN
      BFG
      NaN
      NaN
      NaN
      NaN
      12
      10475
      NaN
      NaN
      NaN
      NaN
      2016
      0.0
      ALL
      USA-IN
      41.6836
      -87.4233
      2017-03-06T16:49:40-05:00
    
    
      1619175
      NaN
      BFG
      NaN
      NaN
      NaN
      NaN
      11
      10475
      NaN
      NaN
      NaN
      NaN
      2016
      0.0
      ALL
      USA-IN
      41.6836
      -87.4233
      2017-03-06T16:49:40-05:00
    
    
      1619176
      NaN
      BFG
      NaN
      NaN
      NaN
      NaN
      10
      10475
      NaN
      NaN
      NaN
      NaN
      2016
      0.0
      ALL
      USA-IN
      41.6836
      -87.4233
      2017-03-06T16:49:40-05:00
    
    
      1619177
      NaN
      BFG
      NaN
      NaN
      NaN
      NaN
      9
      10475
      NaN
      NaN
      NaN
      NaN
      2016
      0.0
      ALL
      USA-IN
      41.6836
      -87.4233
      2017-03-06T16:49:40-05:00
    
    
      1619178
      NaN
      BFG
      NaN
      NaN
      NaN
      NaN
      8
      10475
      NaN
      NaN
      NaN
      NaN
      2016
      0.0
      ALL
      USA-IN
      41.6836
      -87.4233
      2017-03-06T16:49:40-05:00

Fill in missing values from the first merge



In [19]:

    
def fill_missing(df):
    cols = [col[:-2] for col in df.columns if '_x' in col]
    
    # Create new column from the _x version, fill missing values from the _y version
    for col in cols:
        df[col] = df.loc[:, col + '_x']
        df.loc[df[col].isnull(), col] = df.loc[df[col].isnull(), col + '_y']
        
        df.drop([col+'_x', col+'_y'], axis=1, inplace=True)



In [20]:

    
fill_missing(gen_total_fuel)



In [21]:

    
#drop
gen_total_fuel.loc[gen_total_fuel['geography'].isnull()]









    Out[21]:






  
    
      
      f
      fuel
      month
      plant id
      series_id
      units
      total fuel (mmbtu)
      year
      generation (MWh)
      geography
      last_updated
      lat
      lon
      prime mover



In [22]:

    
keep_cols = ['fuel', 'elec fuel (mmbtu)', 'month', 'plant id', 'prime mover', 'year',
             'geography', 'lat', 'lon', 'last_updated']
all_facility_data = gen_total_fuel.merge(facility_eg_fuel.loc[:,keep_cols], 
                                         how='outer', on=merge_cols)

FIll in missing values from second merge and drop units/series_id columns



In [23]:

    
fill_missing(all_facility_data)



In [24]:

    
#drop
all_facility_data.loc[all_facility_data['geography'].isnull()]









    Out[24]:






  
    
      
      f
      fuel
      month
      plant id
      series_id
      units
      total fuel (mmbtu)
      year
      generation (MWh)
      elec fuel (mmbtu)
      geography
      last_updated
      lat
      lon
      prime mover



In [25]:

    
all_facility_data.drop(['units', 'series_id'], axis=1, inplace=True)
all_facility_data.head()









    Out[25]:






  
    
      
      f
      fuel
      month
      plant id
      total fuel (mmbtu)
      year
      generation (MWh)
      elec fuel (mmbtu)
      geography
      last_updated
      lat
      lon
      prime mover
    
  
  
    
      0
      M
      WAT
      12
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
    
    
      1
      M
      WAT
      11
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
    
    
      2
      M
      WAT
      10
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
    
    
      3
      M
      WAT
      9
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
    
    
      4
      M
      WAT
      8
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL



In [26]:

    
#drop
all_facility_data.dtypes









    Out[26]:





f                      object
fuel                   object
month                   int64
plant id                int64
total fuel (mmbtu)    float64
year                    int64
generation (MWh)      float64
elec fuel (mmbtu)     float64
geography              object
last_updated           object
lat                   float64
lon                   float64
prime mover            object
dtype: object

Add datetime and quarter columns



In [27]:

    
all_facility_data['datetime'] = pd.to_datetime(all_facility_data['year'].astype(str) + 
                                               '-' + all_facility_data['month'].astype(str), 
                                               format='%Y-%m')
all_facility_data['quarter'] = all_facility_data['datetime'].dt.quarter



In [28]:

    
#drop
all_facility_data.head()









    Out[28]:






  
    
      
      f
      fuel
      month
      plant id
      total fuel (mmbtu)
      year
      generation (MWh)
      elec fuel (mmbtu)
      geography
      last_updated
      lat
      lon
      prime mover
      datetime
      quarter
    
  
  
    
      0
      M
      WAT
      12
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
      2016-12-01
      4
    
    
      1
      M
      WAT
      11
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
      2016-11-01
      4
    
    
      2
      M
      WAT
      10
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
      2016-10-01
      4
    
    
      3
      M
      WAT
      9
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
      2016-09-01
      3
    
    
      4
      M
      WAT
      8
      10140
      0.0
      2016
      0.0
      0.0
      USA-ID
      2017-03-06T16:49:40-05:00
      44.027444
      -112.719439
      ALL
      2016-08-01
      3

Load emission factors

These are mostly EIA emission factors



In [29]:

    
path = os.path.join('Clean data', 'Final emission factors.csv')
ef = pd.read_csv(path, index_col=0)

Apply factors to facility generation



In [30]:

    
fossil_factors = dict(zip(ef.index, ef['Fossil Factor']))
total_factors = dict(zip(ef.index, ef['Total Factor']))
fossil_factors, total_factors









    Out[30]:





({'AB': 0.0,
  'BFG': 274.31999999999999,
  'BIT': 93.299999999999997,
  'BLQ': 0.0,
  'DFO': 73.159999999999997,
  'GEO': 7.71,
  'JF': 70.900000000000006,
  'KER': 72.299999999999997,
  'LFG': 0.0,
  'LIG': 97.700000000000003,
  'MSB': 0.0,
  'MSN': 90.700000000000003,
  'MSW': 41.689999999999998,
  'NG': 53.07,
  'NUC': 0.0,
  'OBG': 0.0,
  'OBL': 0.0,
  'OBS': 0.0,
  'OG': 59.0,
  'OTH': 0.0,
  'PC': 102.09999999999999,
  'PG': 63.07,
  'PUR': 0.0,
  'RC': 93.299999999999997,
  'RFO': 78.790000000000006,
  'SC': 93.299999999999997,
  'SGC': 93.299999999999997,
  'SGP': 73.159999999999997,
  'SLW': 0.0,
  'SUB': 97.200000000000003,
  'SUN': 0.0,
  'TDF': 85.969999999999999,
  'WAT': 0.0,
  'WC': 93.299999999999997,
  'WDL': 0.0,
  'WDS': 0.0,
  'WH': 0.0,
  'WND': 0.0,
  'WO': 95.25},
 {'AB': 118.17,
  'BFG': 274.31999999999999,
  'BIT': 93.299999999999997,
  'BLQ': 94.400000000000006,
  'DFO': 73.159999999999997,
  'GEO': 7.71,
  'JF': 70.900000000000006,
  'KER': 72.299999999999997,
  'LFG': 52.170000000000002,
  'LIG': 97.700000000000003,
  'MSB': 90.700000000000003,
  'MSN': 90.700000000000003,
  'MSW': 41.689999999999998,
  'NG': 53.07,
  'NUC': 0.0,
  'OBG': 52.170000000000002,
  'OBL': 83.980000000000004,
  'OBS': 105.51000000000001,
  'OG': 59.0,
  'OTH': 0.0,
  'PC': 102.09999999999999,
  'PG': 63.07,
  'PUR': 0.0,
  'RC': 93.299999999999997,
  'RFO': 78.790000000000006,
  'SC': 93.299999999999997,
  'SGC': 93.299999999999997,
  'SGP': 73.159999999999997,
  'SLW': 83.980000000000004,
  'SUB': 97.200000000000003,
  'SUN': 0.0,
  'TDF': 85.969999999999999,
  'WAT': 0.0,
  'WC': 93.299999999999997,
  'WDL': 83.980000000000004,
  'WDS': 93.799999999999997,
  'WH': 0.0,
  'WND': 0.0,
  'WO': 95.25})

Apply emission factors

Fuel emission factor is kg/mmbtu



In [31]:

    
# Start with 0 emissions in all rows
# For fuels where we have an emission factor, replace the 0 with the calculated value
all_facility_data['all fuel fossil CO2 (kg)'] = 0
all_facility_data['elec fuel fossil CO2 (kg)'] = 0
all_facility_data['all fuel total CO2 (kg)'] = 0
all_facility_data['elec fuel total CO2 (kg)'] = 0
for fuel in total_factors.keys():
    # All fuel CO2 emissions
    all_facility_data.loc[all_facility_data['fuel']==fuel,'all fuel fossil CO2 (kg)'] = \
        all_facility_data.loc[all_facility_data['fuel']==fuel,'total fuel (mmbtu)'] * fossil_factors[fuel]      
    all_facility_data.loc[all_facility_data['fuel']==fuel,'all fuel total CO2 (kg)'] = \
        all_facility_data.loc[all_facility_data['fuel']==fuel,'total fuel (mmbtu)'] * total_factors[fuel]
    
    # Electric fuel CO2 emissions
    all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel fossil CO2 (kg)'] = \
        all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel (mmbtu)'] * fossil_factors[fuel]
    all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel total CO2 (kg)'] = \
        all_facility_data.loc[all_facility_data['fuel']==fuel,'elec fuel (mmbtu)'] * total_factors[fuel]

Set nan and negative emissions to 0

When no fuel was used for electricity production, or when negative fuel is somehow reported by EIA, set the emissions to 0. This is implemented by filtering out all values that are greater than or equal to 0.



In [32]:

    
# Fossil CO2
all_facility_data.loc[~(all_facility_data['all fuel fossil CO2 (kg)']>=0),
                      'all fuel fossil CO2 (kg)'] = 0
all_facility_data.loc[~(all_facility_data['elec fuel fossil CO2 (kg)']>=0),
                      'elec fuel fossil CO2 (kg)'] = 0
# Total CO2
all_facility_data.loc[~(all_facility_data['all fuel total CO2 (kg)']>=0),
                      'all fuel total CO2 (kg)'] = 0
all_facility_data.loc[~(all_facility_data['elec fuel total CO2 (kg)']>=0),
                      'elec fuel total CO2 (kg)'] = 0

Export



In [33]:

    
path = os.path.join('Clean data', 'Facility gen fuels and CO2.csv')
all_facility_data.to_csv(path, index=False)



In [ ]:

	f	fuel	geography	last_updated	lat	lon	month	plant id	prime mover	series_id	units	generation (MWh)	year
1620516	M	MSW	USA-NY	2016-07-07T17:18:42-04:00	40.7389	-73.5906	5	10642	ALL	ELEC.PLANT.GEN.10642-MSW-ALL.M	megawatthours	42745.0	2001
1620517	M	MSW	USA-NY	2016-07-07T17:18:42-04:00	40.7389	-73.5906	4	10642	ALL	ELEC.PLANT.GEN.10642-MSW-ALL.M	megawatthours	45311.0	2001
1620518	M	MSW	USA-NY	2016-07-07T17:18:42-04:00	40.7389	-73.5906	3	10642	ALL	ELEC.PLANT.GEN.10642-MSW-ALL.M	megawatthours	49284.0	2001
1620519	M	MSW	USA-NY	2016-07-07T17:18:42-04:00	40.7389	-73.5906	2	10642	ALL	ELEC.PLANT.GEN.10642-MSW-ALL.M	megawatthours	40350.0	2001
1620520	M	MSW	USA-NY	2016-07-07T17:18:42-04:00	40.7389	-73.5906	1	10642	ALL	ELEC.PLANT.GEN.10642-MSW-ALL.M	megawatthours	41493.0	2001

	f	fuel	geography	last_updated	lat	lon	month	plant id	prime mover	series_id	units	year
0	M	WAT	USA-ID	2017-03-06T16:49:40-05:00	44.027444	-112.719439	12	10140	ALL	ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M	MMBtu	2016
1	M	WAT	USA-ID	2017-03-06T16:49:40-05:00	44.027444	-112.719439	11	10140	ALL	ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M	MMBtu	2016
2	M	WAT	USA-ID	2017-03-06T16:49:40-05:00	44.027444	-112.719439	10	10140	ALL	ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M	MMBtu	2016
3	M	WAT	USA-ID	2017-03-06T16:49:40-05:00	44.027444	-112.719439	9	10140	ALL	ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M	MMBtu	2016
4	M	WAT	USA-ID	2017-03-06T16:49:40-05:00	44.027444	-112.719439	8	10140	ALL	ELEC.PLANT.CONS_TOT_BTU.10140-WAT-ALL.M	MMBtu	2016

	f	fuel	geography	last_updated	lat	lon	month	plant id	prime mover	series_id	units	elec fuel (mmbtu)	year
1617999	M	DFO	USA-NJ	2016-07-08T15:27:41-04:00	NaN	NaN	3	50313	ALL	ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M	MMBtu	0.0	2002
1618000	M	DFO	USA-NJ	2016-07-08T15:27:41-04:00	NaN	NaN	2	50313	ALL	ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M	MMBtu	0.0	2002
1618001	M	DFO	USA-NJ	2016-07-08T15:27:41-04:00	NaN	NaN	1	50313	ALL	ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M	MMBtu	0.0	2002
1618002	M	DFO	USA-NJ	2016-07-08T15:27:41-04:00	NaN	NaN	12	50313	ALL	ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M	MMBtu	225.0	2001
1618003	M	DFO	USA-NJ	2016-07-08T15:27:41-04:00	NaN	NaN	1	50313	ALL	ELEC.PLANT.CONS_EG_BTU.50313-DFO-ALL.M	MMBtu	9671.0	2001

	f	fuel	geography	last_updated	lat	lon	month	plant id	prime mover	series_id	units	generation (MWh)	year
0	M	MSB	USA-MD	2017-01-26T09:12:59-05:00	39.266041	-76.629653	11	10629	ST	ELEC.PLANT.GEN.10629-MSB-ST.M	megawatthours	17893.210	2016
1	M	MSB	USA-MD	2017-01-26T09:12:59-05:00	39.266041	-76.629653	10	10629	ST	ELEC.PLANT.GEN.10629-MSB-ST.M	megawatthours	16329.693	2016
2	M	MSB	USA-MD	2017-01-26T09:12:59-05:00	39.266041	-76.629653	9	10629	ST	ELEC.PLANT.GEN.10629-MSB-ST.M	megawatthours	16918.934	2016
3	M	MSB	USA-MD	2017-01-26T09:12:59-05:00	39.266041	-76.629653	8	10629	ST	ELEC.PLANT.GEN.10629-MSB-ST.M	megawatthours	16424.989	2016
4	M	MSB	USA-MD	2017-01-26T09:12:59-05:00	39.266041	-76.629653	7	10629	ST	ELEC.PLANT.GEN.10629-MSB-ST.M	megawatthours	17069.401	2016

	f	fuel	geography_x	last_updated_x	lat_x	lon_x	month	plant id	prime mover_x	series_id	units	total fuel (mmbtu)	year	prime mover_y	geography_y	lat_y	lon_y	last_updated_y
1619174	NaN	BFG	NaN	NaN	NaN	NaN	12	10475	NaN	NaN	NaN	NaN	2016	ALL	USA-IN	41.6836	-87.4233	2017-03-06T16:49:40-05:00
1619175	NaN	BFG	NaN	NaN	NaN	NaN	11	10475	NaN	NaN	NaN	NaN	2016	ALL	USA-IN	41.6836	-87.4233	2017-03-06T16:49:40-05:00
1619176	NaN	BFG	NaN	NaN	NaN	NaN	10	10475	NaN	NaN	NaN	NaN	2016	ALL	USA-IN	41.6836	-87.4233	2017-03-06T16:49:40-05:00
1619177	NaN	BFG	NaN	NaN	NaN	NaN	9	10475	NaN	NaN	NaN	NaN	2016	ALL	USA-IN	41.6836	-87.4233	2017-03-06T16:49:40-05:00
1619178	NaN	BFG	NaN	NaN	NaN	NaN	8	10475	NaN	NaN	NaN	NaN	2016	ALL	USA-IN	41.6836	-87.4233	2017-03-06T16:49:40-05:00