In [1]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [2]:
import sys
import os
import pudl
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import dask.dataframe as dd
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

In [3]:
plt.style.use('ggplot')
mpl.rcParams['figure.figsize'] = (10,4)
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 56

In [4]:
pudl_out = pudl.output.pudltabl.PudlTabl()

In [5]:
gens_eia860 = pudl_out.gens_eia860()
plants_eia860 = pudl_out.plants_eia860()


---------------------------------------------------------------------------
ProgrammingError                          Traceback (most recent call last)
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    508     def do_execute(self, cursor, statement, parameters, context=None):
--> 509         cursor.execute(statement, parameters)
    510 

ProgrammingError: relation "generators_eia860" does not exist
LINE 2: FROM generators_eia860 
             ^


The above exception was the direct cause of the following exception:

ProgrammingError                          Traceback (most recent call last)
<ipython-input-5-bb173ae901f1> in <module>()
----> 1 gens_eia860 = pudl_out.gens_eia860()
      2 plants_eia860 = pudl_out.plants_eia860()

~/code/catalyst/pudl/pudl/output/pudltabl.py in gens_eia860(self, update)
    159                 start_date=self.start_date,
    160                 end_date=self.end_date,
--> 161                 testing=self.testing)
    162         return self._dfs['gens_eia860']
    163 

~/code/catalyst/pudl/pudl/output/eia860.py in generators_eia860(start_date, end_date, testing)
    225         )
    226 
--> 227     gens_eia860 = pd.read_sql(gens_eia860_select, pudl_engine)
    228     # Canonical sources for these fields are elsewhere. We will merge them in.
    229     gens_eia860 = gens_eia860.drop(['utility_id_eia',

~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
    395             sql, index_col=index_col, params=params,
    396             coerce_float=coerce_float, parse_dates=parse_dates,
--> 397             chunksize=chunksize)
    398 
    399 

~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
   1061         args = _convert_params(sql, params)
   1062 
-> 1063         result = self.execute(*args)
   1064         columns = result.keys()
   1065 

~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
    952     def execute(self, *args, **kwargs):
    953         """Simple passthrough to SQLAlchemy connectable"""
--> 954         return self.connectable.execute(*args, **kwargs)
    955 
    956     def read_table(self, table_name, index_col=None, coerce_float=True,

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
   2073 
   2074         connection = self.contextual_connect(close_with_result=True)
-> 2075         return connection.execute(statement, *multiparams, **params)
   2076 
   2077     def scalar(self, statement, *multiparams, **params):

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
    946             raise exc.ObjectNotExecutableError(object)
    947         else:
--> 948             return meth(self, multiparams, params)
    949 
    950     def _execute_function(self, func, multiparams, params):

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
    267     def _execute_on_connection(self, connection, multiparams, params):
    268         if self.supports_execution:
--> 269             return connection._execute_clauseelement(self, multiparams, params)
    270         else:
    271             raise exc.ObjectNotExecutableError(self)

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
   1058             compiled_sql,
   1059             distilled_params,
-> 1060             compiled_sql, distilled_params
   1061         )
   1062         if self._has_events or self.engine._has_events:

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1198                 parameters,
   1199                 cursor,
-> 1200                 context)
   1201 
   1202         if self._has_events or self.engine._has_events:

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1411                 util.raise_from_cause(
   1412                     sqlalchemy_exception,
-> 1413                     exc_info
   1414                 )
   1415             else:

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
    263     exc_type, exc_value, exc_tb = exc_info
    264     cause = exc_value if exc_value is not exception else None
--> 265     reraise(type(exception), exception, tb=exc_tb, cause=cause)
    266 
    267 if py3k:

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    246             value.__cause__ = cause
    247         if value.__traceback__ is not tb:
--> 248             raise value.with_traceback(tb)
    249         raise value
    250 

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1191                         statement,
   1192                         parameters,
-> 1193                         context)
   1194         except BaseException as e:
   1195             self._handle_dbapi_exception(

~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
    507 
    508     def do_execute(self, cursor, statement, parameters, context=None):
--> 509         cursor.execute(statement, parameters)
    510 
    511     def do_execute_no_params(self, cursor, statement, context=None):

ProgrammingError: (psycopg2.ProgrammingError) relation "generators_eia860" does not exist
LINE 2: FROM generators_eia860 
             ^
 [SQL: 'SELECT generators_eia860.id, generators_eia860.report_date, generators_eia860.utility_id_eia, generators_eia860.utility_name, generators_eia860.plant_id_eia, generators_eia860.plant_name, generators_eia860.state, generators_eia860.county, generators_eia860.generator_id, generators_eia860.prime_mover_code, generators_eia860.unit_id_eia, generators_eia860.operational_status_code, generators_eia860.ownership_code, generators_eia860.duct_burners, generators_eia860.capacity_mw, generators_eia860.summer_capacity_mw, generators_eia860.winter_capacity_mw, generators_eia860.operating_date, generators_eia860.energy_source_code_1, generators_eia860.energy_source_code_2, generators_eia860.energy_source_code_3, generators_eia860.energy_source_code_4, generators_eia860.energy_source_code_5, generators_eia860.energy_source_code_6, generators_eia860.fuel_type_code_pudl, generators_eia860.multiple_fuels, generators_eia860.deliver_power_transgrid, generators_eia860.syncronized_transmission_grid, generators_eia860.turbines_num, generators_eia860.sector_name, generators_eia860.sector_id, generators_eia860.topping_bottoming_code, generators_eia860.planned_modifications, generators_eia860.planned_net_summer_capacity_uprate_mw, generators_eia860.planned_net_winter_capacity_uprate_mw, generators_eia860.planned_uprate_date, generators_eia860.planned_net_summer_capacity_derate_mw, generators_eia860.planned_net_winter_capacity_derate_mw, generators_eia860.planned_derate_date, generators_eia860.planned_new_prime_mover_code, generators_eia860.planned_energy_source_code_1, generators_eia860.planned_repower_date, generators_eia860.other_planned_modifications, generators_eia860.other_modifications_date, generators_eia860.planned_retirement_date, generators_eia860.solid_fuel_gasification, generators_eia860.pulverized_coal_tech, generators_eia860.fluidized_bed_tech, generators_eia860.subcritical_tech, generators_eia860.supercritical_tech, generators_eia860.ultrasupercritical_tech, generators_eia860.carbon_capture, generators_eia860.startup_source_code_1, generators_eia860.startup_source_code_2, generators_eia860.startup_source_code_3, generators_eia860.startup_source_code_4, generators_eia860.technology_description, generators_eia860.turbines_inverters_hydrokinetics, generators_eia860.time_cold_shutdown_full_load_code, generators_eia860.stoker_tech, generators_eia860.other_combustion_tech, generators_eia860.planned_new_capacity_mw, generators_eia860.cofire_fuels, generators_eia860.switch_oil_gas, generators_eia860.heat_bypass_recovery, generators_eia860.rto_iso_lmp_node_id, generators_eia860.rto_iso_location_wholesale_reporting_id, generators_eia860.nameplate_power_factor, generators_eia860.minimum_load_mw, generators_eia860.uprate_derate_during_year, generators_eia860.uprate_derate_completed_date, generators_eia860.associated_combined_heat_power, generators_eia860.original_planned_operating_date, generators_eia860.current_planned_operating_date, generators_eia860.summer_estimated_capability_mw, generators_eia860.winter_estimated_capability_mw, generators_eia860.operating_switch, generators_eia860.previously_canceled, generators_eia860.retirement_date \nFROM generators_eia860 \nWHERE generators_eia860.report_date >= %(report_date_1)s AND generators_eia860.report_date <= %(report_date_2)s'] [parameters: {'report_date_1': Timestamp('2009-01-01 00:00:00'), 'report_date_2': Timestamp('2017-12-31 00:00:00')}] (Background on this error at: http://sqlalche.me/e/f405)

In [14]:
gen_eia923 = pudl_out.gen_eia923()

In [66]:
xcel_util_ids = [
    13781,
    15466,
    17718,
]
cols = [
    'report_date',
    'plant_id_eia',
    'plant_name',
    'utility_id_eia',
    'utility_name',
    'state',
    'fuel_type_code_pudl',
    'technology_description',
    'capacity_mw',
    'unit_id_eia',
    'operating_date',
    'retirement_date'
]

In [67]:
gens_eia860.filter(regex='.*date.*').columns


Out[67]:
Index(['report_date', 'current_planned_operating_date', 'operating_date',
       'original_planned_operating_date', 'other_modifications_date',
       'planned_derate_date', 'planned_repower_date',
       'planned_retirement_date', 'planned_uprate_date', 'retirement_date',
       'uprate_derate_completed_date'],
      dtype='object')

In [69]:
gens_eia860[(gens_eia860.utility_id_eia.isin(xcel_util_ids)) &
            (pd.to_datetime(gens_eia860.report_date).dt.year==2017) &
            (gens_eia860.fuel_type_code_pudl.isin(['coal','gas']))][cols].sort_values('operating_date', ascending=False)


Out[69]:
report_date plant_id_eia plant_name utility_id_eia utility_name state fuel_type_code_pudl technology_description capacity_mw unit_id_eia operating_date retirement_date
128610 2017-01-01 469 Cherokee 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 185.3 CHR0 2015-08-01 None
128611 2017-01-01 469 Cherokee 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 255.0 CHR0 2015-08-01 None
128609 2017-01-01 469 Cherokee 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 185.3 CHR0 2015-08-01 None
134232 2017-01-01 3482 Jones 17718 Southwestern Public Service Co TX gas Natural Gas Fired Combustion Turbine 182.7 None 2013-05-01 None
134231 2017-01-01 3482 Jones 17718 Southwestern Public Service Co TX gas Natural Gas Fired Combustion Turbine 182.7 None 2011-06-01 None
128617 2017-01-01 470 Comanche (CO) 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 856.8 None 2010-07-01 None
135485 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combustion Turbine 140.3 None 2009-05-01 None
135484 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combustion Turbine 140.3 None 2009-05-01 None
131514 2017-01-01 1927 Riverside (MN) 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 210.6 RIV0 2009-05-01 None
131513 2017-01-01 1927 Riverside (MN) 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 210.6 RIV0 2009-05-01 None
131496 2017-01-01 1912 High Bridge 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 250.0 HBR0 2008-05-01 None
131495 2017-01-01 1912 High Bridge 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 197.0 HBR0 2008-05-01 None
131494 2017-01-01 1912 High Bridge 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 197.0 HBR0 2008-05-01 None
137848 2017-01-01 8027 Blue Lake 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combustion Turbine 166.3 None 2005-06-01 None
136664 2017-01-01 7237 Angus Anson 13781 Northern States Power Co - Minnesota SD gas Natural Gas Fired Combustion Turbine 166.3 None 2005-06-01 None
137849 2017-01-01 8027 Blue Lake 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combustion Turbine 166.3 None 2005-06-01 None
143364 2017-01-01 55835 Rocky Mountain Energy Center 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 175.1 RKM0 2004-05-01 None
143363 2017-01-01 55835 Rocky Mountain Energy Center 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 175.1 RKM0 2004-05-01 None
143365 2017-01-01 55835 Rocky Mountain Energy Center 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 334.9 RKM0 2004-05-01 None
143156 2017-01-01 55645 Blue Spruce Energy Center 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combustion Turbine 198.9 None 2003-05-01 None
143157 2017-01-01 55645 Blue Spruce Energy Center 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combustion Turbine 198.9 None 2003-05-01 None
131477 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 187.9 BDS0 2002-06-01 None
135483 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 175.1 FSV0 2001-06-01 None
135482 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 175.1 FSV0 1999-01-01 None
135480 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 342.6 FSV0 1998-07-01 None
132557 2017-01-01 2454 Cunningham 17718 Southwestern Public Service Co NM gas Natural Gas Fired Combustion Turbine 126.9 None 1998-05-01 None
132558 2017-01-01 2454 Cunningham 17718 Southwestern Public Service Co NM gas Natural Gas Fired Combustion Turbine 126.9 None 1998-05-01 None
135481 2017-01-01 6112 Fort St Vrain 15466 Public Service Co of Colorado CO gas Natural Gas Fired Combined Cycle 175.1 FSV0 1996-05-01 None
136663 2017-01-01 7237 Angus Anson 13781 Northern States Power Co - Minnesota SD gas Natural Gas Fired Combustion Turbine 119.7 None 1994-08-01 None
136662 2017-01-01 7237 Angus Anson 13781 Northern States Power Co - Minnesota SD gas Natural Gas Fired Combustion Turbine 119.7 None 1994-08-01 None
... ... ... ... ... ... ... ... ... ... ... ... ...
134233 2017-01-01 3484 Nichols 17718 Southwestern Public Service Co TX gas Natural Gas Steam Turbine 113.6 None 1960-01-01 None
128613 2017-01-01 469 Cherokee 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 125.0 None 1959-06-01 2011-10-01
131498 2017-01-01 1912 High Bridge 13781 Northern States Power Co - Minnesota MN coal Conventional Steam Coal 163.2 None 1959-04-01 2007-12-01
131507 2017-01-01 1915 Allen S King 13781 Northern States Power Co - Minnesota MN coal Conventional Steam Coal 598.4 None 1958-03-01 None
128612 2017-01-01 469 Cherokee 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 125.0 None 1957-06-01 2012-04-01
132555 2017-01-01 2454 Cunningham 17718 Southwestern Public Service Co NM gas Natural Gas Steam Turbine 75.0 None 1957-01-01 None
134876 2017-01-01 3982 Bay Front 13781 Northern States Power Co - Minnesota WI gas Natural Gas Steam Turbine 27.2 None 1957-01-01 None
131497 2017-01-01 1912 High Bridge 13781 Northern States Power Co - Minnesota MN coal Conventional Steam Coal 113.6 None 1956-08-01 2007-12-01
131480 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN coal Conventional Steam Coal 113.6 None 1955-07-01 2015-04-01
165686 2017-01-01 465 Arapahoe 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 112.5 None 1955-06-01 2013-12-01
134238 2017-01-01 3485 Plant X 17718 Southwestern Public Service Co TX gas Natural Gas Steam Turbine 98.0 None 1955-01-01 None
131476 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 136.9 BDS0 1954-10-01 None
165690 2017-01-01 478 Zuni 15466 Public Service Co of Colorado CO gas Natural Gas Steam Turbine 75.0 None 1954-06-01 2015-12-01
166362 2017-01-01 3483 Moore County 17718 Southwestern Public Service Co TX gas Natural Gas Steam Turbine 49.0 None 1954-01-01 2013-09-01
166089 2017-01-01 1918 Minnesota Valley 13781 Northern States Power Co - Minnesota MN gas Natural Gas Steam Turbine 46.0 None 1953-09-01 2006-12-01
134237 2017-01-01 3485 Plant X 17718 Southwestern Public Service Co TX gas Natural Gas Steam Turbine 98.0 None 1953-01-01 None
131479 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN coal Conventional Steam Coal 81.0 None 1952-08-01 2002-06-01
134236 2017-01-01 3485 Plant X 17718 Southwestern Public Service Co TX gas Natural Gas Steam Turbine 48.0 None 1952-01-01 None
165685 2017-01-01 465 Arapahoe 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 40.0 None 1951-06-01 2013-12-01
165683 2017-01-01 465 Arapahoe 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 44.0 None None 2002-12-01
165684 2017-01-01 465 Arapahoe 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 44.0 None None 2002-12-01
165687 2017-01-01 468 Cameo 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 25.0 None None 2010-12-01
165688 2017-01-01 468 Cameo 15466 Public Service Co of Colorado CO coal Conventional Steam Coal 50.0 None None 2010-12-01
165689 2017-01-01 478 Zuni 15466 Public Service Co of Colorado CO gas Natural Gas Steam Turbine 40.2 None None 2010-01-01
131482 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 360.0 None None None
131478 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combustion Turbine 238.0 None None None
131483 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 200.0 None None None
131484 2017-01-01 1904 Black Dog 13781 Northern States Power Co - Minnesota MN gas Natural Gas Fired Combined Cycle 200.0 None None None
166337 2017-01-01 3334 Pathfinder 13781 Northern States Power Co - Minnesota SD gas Natural Gas Steam Turbine 75.0 None None 2001-06-01
156546 2017-01-01 60697 Gaines County 17718 Southwestern Public Service Co TX gas Natural Gas Fired Combustion Turbine 225.0 None None None

118 rows × 12 columns


In [21]:
ops_cols = [
    'co2_mass_tons',
    'facility_id',
    'gross_load_mw',
    'heat_content_mmbtu',
    'operating_time_hours',
    'plant_id_eia',
    'state',
    'unit_id_epa',
    'unitid',
    'operating_datetime'
]
epacems_datadir = os.path.join(pudl.settings.PUDL_DIR,'results','parquet','epacems')
cems_dd = dd.read_parquet(epacems_datadir + '/*/*.parquet', columns=ops_cols)


CPU times: user 1 s, sys: 14.4 ms, total: 1.02 s
Wall time: 1.02 s

In [22]:
cems_dd.info()


<class 'dask.dataframe.core.DataFrame'>
Columns: 10 entries, co2_mass_tons to operating_datetime
dtypes: category(4), datetime64[ns](1), float32(4), uint16(1)

In [12]:
%time co_df = cems_dd[cems_dd.state=='CO'].compute()


CPU times: user 3min 48s, sys: 1min 38s, total: 5min 27s
Wall time: 1min 28s

In [14]:
co_df.sample(10)


Out[14]:
co2_mass_tons facility_id gross_load_mw heat_content_mmbtu operating_time_hours plant_id_eia state unit_id_epa unitid operating_datetime
61255 NaN nan NaN NaN 0.0 6248 CO nan 1 2005-02-24 07:00:00
490394 258.799988 80 247.0 2556.699951 1.0 470 CO 300 2 2012-10-03 02:00:00
268501 NaN nan NaN NaN 0.0 6761 CO nan C 2004-07-06 13:00:00
8759 40.299999 83 33.0 384.600006 1.0 492 CO 313 5 2013-01-24 23:00:00
238066 NaN 1333 NaN NaN 0.0 55200 CO 4125 CT6 2011-05-01 10:00:00
326165 NaN nan NaN NaN 0.0 55504 CO nan L1 2006-08-29 05:00:00
478978 NaN 79 NaN NaN 0.0 469 CO 298 4 2017-10-29 10:00:00
406207 52.700001 nan 46.0 513.900024 1.0 468 CO nan 2 2007-10-19 07:00:00
85015 NaN 8291 NaN NaN 0.0 50707 CO 90508 S005 2012-02-04 07:00:00
577350 13.100000 82 NaN 220.600006 1.0 478 CO 310 1 2010-12-02 06:00:00

In [45]:
epacems_old_datadir = os.path.join(pudl.settings.PUDL_DIR,'results','parquet','epacems-old')
test_datadirs = epacems_old_datadir + '/year*[2000,2001]/*.parquet'
test_dd = dd.read_parquet(test_datadirs, columns=ops_cols)

In [46]:
test_dd


Out[46]:
Dask DataFrame Structure:
co2_mass_tons facility_id gross_load_mw heat_content_mmbtu operating_time_hours plant_id_eia state unit_id_epa unitid operating_datetime
npartitions=294
float32 category[unknown] float32 float32 float32 uint16 category[unknown] category[unknown] category[unknown] datetime64[ns]
... ... ... ... ... ... ... ... ... ...
... ... ... ... ... ... ... ... ... ... ...
... ... ... ... ... ... ... ... ... ...
... ... ... ... ... ... ... ... ... ...
Dask Name: read-parquet, 294 tasks

In [33]:
wy_df.sample(20)


Out[33]:
co2_mass_tons facility_id gross_load_mw heat_content_mmbtu operating_time_hours plant_id_eia state unit_id_epa unitid operating_datetime
132324 324.899994 765 300.0 3097.800049 1.0 4162 WY 2639 3 2016-08-03 12:00:00
73809 153.699997 764 150.0 1465.099976 1.0 4158 WY 2635 BW43 2017-05-14 09:00:00
132963 430.100006 819 383.0 4101.200195 1.0 6101 WY 2777 BW91 2017-08-24 03:00:00
140192 NaN 1508 NaN NaN 0.0 55477 WY 4849 CT2 2017-08-15 08:00:00
66212 331.100006 1069 305.0 3156.699951 1.0 8066 WY 3458 BW74 2017-04-29 20:00:00
15722 447.299988 8296 434.0 4265.100098 1.0 56609 WY 90531 01 2017-01-05 02:00:00
79540 NaN 847 NaN NaN 0.0 6204 WY 2845 2 2016-05-11 04:00:00
118936 574.299988 1069 545.0 5475.500000 1.0 8066 WY 3455 BW71 2017-07-28 16:00:00
124233 457.500000 8296 448.0 4361.899902 1.0 56609 WY 90531 01 2017-07-01 09:00:00
169491 322.000000 765 299.0 3070.500000 1.0 4162 WY 2639 3 2016-10-27 03:00:00
200585 64.099998 764 57.0 611.200012 1.0 4158 WY 2633 BW41 2017-12-08 17:00:00
90167 NaN 8326 NaN NaN 0.0 57703 WY 90709 CT03 2017-05-13 23:00:00
134324 161.600006 847 0.0 1541.199951 1.0 6204 WY 2844 1 2016-08-24 20:00:00
22862 310.399994 765 298.0 2960.000000 1.0 4162 WY 2639 3 2016-02-04 14:00:00
35526 87.900002 764 80.0 838.099976 1.0 4158 WY 2633 BW41 2017-03-06 06:00:00
212286 574.700012 1069 560.0 5480.000000 1.0 8066 WY 3457 BW73 2016-12-06 06:00:00
80897 110.699997 967 90.0 1055.800049 1.0 7504 WY 3120 001 2017-05-30 17:00:00
117561 110.800003 967 91.0 1056.500000 1.0 7504 WY 3120 001 2016-07-08 09:00:00
104232 NaN 1510 NaN NaN 0.0 55479 WY 4852 001 2016-06-04 00:00:00
165614 181.199997 764 167.0 1727.599976 1.0 4158 WY 2635 BW43 2017-10-14 14:00:00

In [ ]: