In [1]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler
In [2]:
import sys
import os
import pudl
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import dask.dataframe as dd
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
In [3]:
plt.style.use('ggplot')
mpl.rcParams['figure.figsize'] = (10,4)
mpl.rcParams['figure.dpi'] = 150
pd.options.display.max_columns = 56
In [4]:
pudl_out = pudl.output.pudltabl.PudlTabl()
In [5]:
gens_eia860 = pudl_out.gens_eia860()
plants_eia860 = pudl_out.plants_eia860()
---------------------------------------------------------------------------
ProgrammingError Traceback (most recent call last)
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
508 def do_execute(self, cursor, statement, parameters, context=None):
--> 509 cursor.execute(statement, parameters)
510
ProgrammingError: relation "generators_eia860" does not exist
LINE 2: FROM generators_eia860
^
The above exception was the direct cause of the following exception:
ProgrammingError Traceback (most recent call last)
<ipython-input-5-bb173ae901f1> in <module>()
----> 1 gens_eia860 = pudl_out.gens_eia860()
2 plants_eia860 = pudl_out.plants_eia860()
~/code/catalyst/pudl/pudl/output/pudltabl.py in gens_eia860(self, update)
159 start_date=self.start_date,
160 end_date=self.end_date,
--> 161 testing=self.testing)
162 return self._dfs['gens_eia860']
163
~/code/catalyst/pudl/pudl/output/eia860.py in generators_eia860(start_date, end_date, testing)
225 )
226
--> 227 gens_eia860 = pd.read_sql(gens_eia860_select, pudl_engine)
228 # Canonical sources for these fields are elsewhere. We will merge them in.
229 gens_eia860 = gens_eia860.drop(['utility_id_eia',
~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)
395 sql, index_col=index_col, params=params,
396 coerce_float=coerce_float, parse_dates=parse_dates,
--> 397 chunksize=chunksize)
398
399
~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize)
1061 args = _convert_params(sql, params)
1062
-> 1063 result = self.execute(*args)
1064 columns = result.keys()
1065
~/anaconda3/envs/pudl/lib/python3.7/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs)
952 def execute(self, *args, **kwargs):
953 """Simple passthrough to SQLAlchemy connectable"""
--> 954 return self.connectable.execute(*args, **kwargs)
955
956 def read_table(self, table_name, index_col=None, coerce_float=True,
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params)
2073
2074 connection = self.contextual_connect(close_with_result=True)
-> 2075 return connection.execute(statement, *multiparams, **params)
2076
2077 def scalar(self, statement, *multiparams, **params):
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
946 raise exc.ObjectNotExecutableError(object)
947 else:
--> 948 return meth(self, multiparams, params)
949
950 def _execute_function(self, func, multiparams, params):
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
267 def _execute_on_connection(self, connection, multiparams, params):
268 if self.supports_execution:
--> 269 return connection._execute_clauseelement(self, multiparams, params)
270 else:
271 raise exc.ObjectNotExecutableError(self)
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
1058 compiled_sql,
1059 distilled_params,
-> 1060 compiled_sql, distilled_params
1061 )
1062 if self._has_events or self.engine._has_events:
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1198 parameters,
1199 cursor,
-> 1200 context)
1201
1202 if self._has_events or self.engine._has_events:
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1411 util.raise_from_cause(
1412 sqlalchemy_exception,
-> 1413 exc_info
1414 )
1415 else:
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/util/compat.py in raise_from_cause(exception, exc_info)
263 exc_type, exc_value, exc_tb = exc_info
264 cause = exc_value if exc_value is not exception else None
--> 265 reraise(type(exception), exception, tb=exc_tb, cause=cause)
266
267 if py3k:
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
246 value.__cause__ = cause
247 if value.__traceback__ is not tb:
--> 248 raise value.with_traceback(tb)
249 raise value
250
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1191 statement,
1192 parameters,
-> 1193 context)
1194 except BaseException as e:
1195 self._handle_dbapi_exception(
~/anaconda3/envs/pudl/lib/python3.7/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context)
507
508 def do_execute(self, cursor, statement, parameters, context=None):
--> 509 cursor.execute(statement, parameters)
510
511 def do_execute_no_params(self, cursor, statement, context=None):
ProgrammingError: (psycopg2.ProgrammingError) relation "generators_eia860" does not exist
LINE 2: FROM generators_eia860
^
[SQL: 'SELECT generators_eia860.id, generators_eia860.report_date, generators_eia860.utility_id_eia, generators_eia860.utility_name, generators_eia860.plant_id_eia, generators_eia860.plant_name, generators_eia860.state, generators_eia860.county, generators_eia860.generator_id, generators_eia860.prime_mover_code, generators_eia860.unit_id_eia, generators_eia860.operational_status_code, generators_eia860.ownership_code, generators_eia860.duct_burners, generators_eia860.capacity_mw, generators_eia860.summer_capacity_mw, generators_eia860.winter_capacity_mw, generators_eia860.operating_date, generators_eia860.energy_source_code_1, generators_eia860.energy_source_code_2, generators_eia860.energy_source_code_3, generators_eia860.energy_source_code_4, generators_eia860.energy_source_code_5, generators_eia860.energy_source_code_6, generators_eia860.fuel_type_code_pudl, generators_eia860.multiple_fuels, generators_eia860.deliver_power_transgrid, generators_eia860.syncronized_transmission_grid, generators_eia860.turbines_num, generators_eia860.sector_name, generators_eia860.sector_id, generators_eia860.topping_bottoming_code, generators_eia860.planned_modifications, generators_eia860.planned_net_summer_capacity_uprate_mw, generators_eia860.planned_net_winter_capacity_uprate_mw, generators_eia860.planned_uprate_date, generators_eia860.planned_net_summer_capacity_derate_mw, generators_eia860.planned_net_winter_capacity_derate_mw, generators_eia860.planned_derate_date, generators_eia860.planned_new_prime_mover_code, generators_eia860.planned_energy_source_code_1, generators_eia860.planned_repower_date, generators_eia860.other_planned_modifications, generators_eia860.other_modifications_date, generators_eia860.planned_retirement_date, generators_eia860.solid_fuel_gasification, generators_eia860.pulverized_coal_tech, generators_eia860.fluidized_bed_tech, generators_eia860.subcritical_tech, generators_eia860.supercritical_tech, generators_eia860.ultrasupercritical_tech, generators_eia860.carbon_capture, generators_eia860.startup_source_code_1, generators_eia860.startup_source_code_2, generators_eia860.startup_source_code_3, generators_eia860.startup_source_code_4, generators_eia860.technology_description, generators_eia860.turbines_inverters_hydrokinetics, generators_eia860.time_cold_shutdown_full_load_code, generators_eia860.stoker_tech, generators_eia860.other_combustion_tech, generators_eia860.planned_new_capacity_mw, generators_eia860.cofire_fuels, generators_eia860.switch_oil_gas, generators_eia860.heat_bypass_recovery, generators_eia860.rto_iso_lmp_node_id, generators_eia860.rto_iso_location_wholesale_reporting_id, generators_eia860.nameplate_power_factor, generators_eia860.minimum_load_mw, generators_eia860.uprate_derate_during_year, generators_eia860.uprate_derate_completed_date, generators_eia860.associated_combined_heat_power, generators_eia860.original_planned_operating_date, generators_eia860.current_planned_operating_date, generators_eia860.summer_estimated_capability_mw, generators_eia860.winter_estimated_capability_mw, generators_eia860.operating_switch, generators_eia860.previously_canceled, generators_eia860.retirement_date \nFROM generators_eia860 \nWHERE generators_eia860.report_date >= %(report_date_1)s AND generators_eia860.report_date <= %(report_date_2)s'] [parameters: {'report_date_1': Timestamp('2009-01-01 00:00:00'), 'report_date_2': Timestamp('2017-12-31 00:00:00')}] (Background on this error at: http://sqlalche.me/e/f405)
In [14]:
gen_eia923 = pudl_out.gen_eia923()
In [66]:
xcel_util_ids = [
13781,
15466,
17718,
]
cols = [
'report_date',
'plant_id_eia',
'plant_name',
'utility_id_eia',
'utility_name',
'state',
'fuel_type_code_pudl',
'technology_description',
'capacity_mw',
'unit_id_eia',
'operating_date',
'retirement_date'
]
In [67]:
gens_eia860.filter(regex='.*date.*').columns
Out[67]:
Index(['report_date', 'current_planned_operating_date', 'operating_date',
'original_planned_operating_date', 'other_modifications_date',
'planned_derate_date', 'planned_repower_date',
'planned_retirement_date', 'planned_uprate_date', 'retirement_date',
'uprate_derate_completed_date'],
dtype='object')
In [69]:
gens_eia860[(gens_eia860.utility_id_eia.isin(xcel_util_ids)) &
(pd.to_datetime(gens_eia860.report_date).dt.year==2017) &
(gens_eia860.fuel_type_code_pudl.isin(['coal','gas']))][cols].sort_values('operating_date', ascending=False)
Out[69]:
report_date
plant_id_eia
plant_name
utility_id_eia
utility_name
state
fuel_type_code_pudl
technology_description
capacity_mw
unit_id_eia
operating_date
retirement_date
128610
2017-01-01
469
Cherokee
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
185.3
CHR0
2015-08-01
None
128611
2017-01-01
469
Cherokee
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
255.0
CHR0
2015-08-01
None
128609
2017-01-01
469
Cherokee
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
185.3
CHR0
2015-08-01
None
134232
2017-01-01
3482
Jones
17718
Southwestern Public Service Co
TX
gas
Natural Gas Fired Combustion Turbine
182.7
None
2013-05-01
None
134231
2017-01-01
3482
Jones
17718
Southwestern Public Service Co
TX
gas
Natural Gas Fired Combustion Turbine
182.7
None
2011-06-01
None
128617
2017-01-01
470
Comanche (CO)
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
856.8
None
2010-07-01
None
135485
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combustion Turbine
140.3
None
2009-05-01
None
135484
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combustion Turbine
140.3
None
2009-05-01
None
131514
2017-01-01
1927
Riverside (MN)
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
210.6
RIV0
2009-05-01
None
131513
2017-01-01
1927
Riverside (MN)
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
210.6
RIV0
2009-05-01
None
131496
2017-01-01
1912
High Bridge
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
250.0
HBR0
2008-05-01
None
131495
2017-01-01
1912
High Bridge
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
197.0
HBR0
2008-05-01
None
131494
2017-01-01
1912
High Bridge
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
197.0
HBR0
2008-05-01
None
137848
2017-01-01
8027
Blue Lake
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combustion Turbine
166.3
None
2005-06-01
None
136664
2017-01-01
7237
Angus Anson
13781
Northern States Power Co - Minnesota
SD
gas
Natural Gas Fired Combustion Turbine
166.3
None
2005-06-01
None
137849
2017-01-01
8027
Blue Lake
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combustion Turbine
166.3
None
2005-06-01
None
143364
2017-01-01
55835
Rocky Mountain Energy Center
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
175.1
RKM0
2004-05-01
None
143363
2017-01-01
55835
Rocky Mountain Energy Center
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
175.1
RKM0
2004-05-01
None
143365
2017-01-01
55835
Rocky Mountain Energy Center
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
334.9
RKM0
2004-05-01
None
143156
2017-01-01
55645
Blue Spruce Energy Center
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combustion Turbine
198.9
None
2003-05-01
None
143157
2017-01-01
55645
Blue Spruce Energy Center
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combustion Turbine
198.9
None
2003-05-01
None
131477
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
187.9
BDS0
2002-06-01
None
135483
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
175.1
FSV0
2001-06-01
None
135482
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
175.1
FSV0
1999-01-01
None
135480
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
342.6
FSV0
1998-07-01
None
132557
2017-01-01
2454
Cunningham
17718
Southwestern Public Service Co
NM
gas
Natural Gas Fired Combustion Turbine
126.9
None
1998-05-01
None
132558
2017-01-01
2454
Cunningham
17718
Southwestern Public Service Co
NM
gas
Natural Gas Fired Combustion Turbine
126.9
None
1998-05-01
None
135481
2017-01-01
6112
Fort St Vrain
15466
Public Service Co of Colorado
CO
gas
Natural Gas Fired Combined Cycle
175.1
FSV0
1996-05-01
None
136663
2017-01-01
7237
Angus Anson
13781
Northern States Power Co - Minnesota
SD
gas
Natural Gas Fired Combustion Turbine
119.7
None
1994-08-01
None
136662
2017-01-01
7237
Angus Anson
13781
Northern States Power Co - Minnesota
SD
gas
Natural Gas Fired Combustion Turbine
119.7
None
1994-08-01
None
...
...
...
...
...
...
...
...
...
...
...
...
...
134233
2017-01-01
3484
Nichols
17718
Southwestern Public Service Co
TX
gas
Natural Gas Steam Turbine
113.6
None
1960-01-01
None
128613
2017-01-01
469
Cherokee
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
125.0
None
1959-06-01
2011-10-01
131498
2017-01-01
1912
High Bridge
13781
Northern States Power Co - Minnesota
MN
coal
Conventional Steam Coal
163.2
None
1959-04-01
2007-12-01
131507
2017-01-01
1915
Allen S King
13781
Northern States Power Co - Minnesota
MN
coal
Conventional Steam Coal
598.4
None
1958-03-01
None
128612
2017-01-01
469
Cherokee
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
125.0
None
1957-06-01
2012-04-01
132555
2017-01-01
2454
Cunningham
17718
Southwestern Public Service Co
NM
gas
Natural Gas Steam Turbine
75.0
None
1957-01-01
None
134876
2017-01-01
3982
Bay Front
13781
Northern States Power Co - Minnesota
WI
gas
Natural Gas Steam Turbine
27.2
None
1957-01-01
None
131497
2017-01-01
1912
High Bridge
13781
Northern States Power Co - Minnesota
MN
coal
Conventional Steam Coal
113.6
None
1956-08-01
2007-12-01
131480
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
coal
Conventional Steam Coal
113.6
None
1955-07-01
2015-04-01
165686
2017-01-01
465
Arapahoe
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
112.5
None
1955-06-01
2013-12-01
134238
2017-01-01
3485
Plant X
17718
Southwestern Public Service Co
TX
gas
Natural Gas Steam Turbine
98.0
None
1955-01-01
None
131476
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
136.9
BDS0
1954-10-01
None
165690
2017-01-01
478
Zuni
15466
Public Service Co of Colorado
CO
gas
Natural Gas Steam Turbine
75.0
None
1954-06-01
2015-12-01
166362
2017-01-01
3483
Moore County
17718
Southwestern Public Service Co
TX
gas
Natural Gas Steam Turbine
49.0
None
1954-01-01
2013-09-01
166089
2017-01-01
1918
Minnesota Valley
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Steam Turbine
46.0
None
1953-09-01
2006-12-01
134237
2017-01-01
3485
Plant X
17718
Southwestern Public Service Co
TX
gas
Natural Gas Steam Turbine
98.0
None
1953-01-01
None
131479
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
coal
Conventional Steam Coal
81.0
None
1952-08-01
2002-06-01
134236
2017-01-01
3485
Plant X
17718
Southwestern Public Service Co
TX
gas
Natural Gas Steam Turbine
48.0
None
1952-01-01
None
165685
2017-01-01
465
Arapahoe
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
40.0
None
1951-06-01
2013-12-01
165683
2017-01-01
465
Arapahoe
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
44.0
None
None
2002-12-01
165684
2017-01-01
465
Arapahoe
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
44.0
None
None
2002-12-01
165687
2017-01-01
468
Cameo
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
25.0
None
None
2010-12-01
165688
2017-01-01
468
Cameo
15466
Public Service Co of Colorado
CO
coal
Conventional Steam Coal
50.0
None
None
2010-12-01
165689
2017-01-01
478
Zuni
15466
Public Service Co of Colorado
CO
gas
Natural Gas Steam Turbine
40.2
None
None
2010-01-01
131482
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
360.0
None
None
None
131478
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combustion Turbine
238.0
None
None
None
131483
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
200.0
None
None
None
131484
2017-01-01
1904
Black Dog
13781
Northern States Power Co - Minnesota
MN
gas
Natural Gas Fired Combined Cycle
200.0
None
None
None
166337
2017-01-01
3334
Pathfinder
13781
Northern States Power Co - Minnesota
SD
gas
Natural Gas Steam Turbine
75.0
None
None
2001-06-01
156546
2017-01-01
60697
Gaines County
17718
Southwestern Public Service Co
TX
gas
Natural Gas Fired Combustion Turbine
225.0
None
None
None
118 rows × 12 columns
In [21]:
ops_cols = [
'co2_mass_tons',
'facility_id',
'gross_load_mw',
'heat_content_mmbtu',
'operating_time_hours',
'plant_id_eia',
'state',
'unit_id_epa',
'unitid',
'operating_datetime'
]
epacems_datadir = os.path.join(pudl.settings.PUDL_DIR,'results','parquet','epacems')
cems_dd = dd.read_parquet(epacems_datadir + '/*/*.parquet', columns=ops_cols)
CPU times: user 1 s, sys: 14.4 ms, total: 1.02 s
Wall time: 1.02 s
In [22]:
cems_dd.info()
<class 'dask.dataframe.core.DataFrame'>
Columns: 10 entries, co2_mass_tons to operating_datetime
dtypes: category(4), datetime64[ns](1), float32(4), uint16(1)
In [12]:
%time co_df = cems_dd[cems_dd.state=='CO'].compute()
CPU times: user 3min 48s, sys: 1min 38s, total: 5min 27s
Wall time: 1min 28s
In [14]:
co_df.sample(10)
Out[14]:
co2_mass_tons
facility_id
gross_load_mw
heat_content_mmbtu
operating_time_hours
plant_id_eia
state
unit_id_epa
unitid
operating_datetime
61255
NaN
nan
NaN
NaN
0.0
6248
CO
nan
1
2005-02-24 07:00:00
490394
258.799988
80
247.0
2556.699951
1.0
470
CO
300
2
2012-10-03 02:00:00
268501
NaN
nan
NaN
NaN
0.0
6761
CO
nan
C
2004-07-06 13:00:00
8759
40.299999
83
33.0
384.600006
1.0
492
CO
313
5
2013-01-24 23:00:00
238066
NaN
1333
NaN
NaN
0.0
55200
CO
4125
CT6
2011-05-01 10:00:00
326165
NaN
nan
NaN
NaN
0.0
55504
CO
nan
L1
2006-08-29 05:00:00
478978
NaN
79
NaN
NaN
0.0
469
CO
298
4
2017-10-29 10:00:00
406207
52.700001
nan
46.0
513.900024
1.0
468
CO
nan
2
2007-10-19 07:00:00
85015
NaN
8291
NaN
NaN
0.0
50707
CO
90508
S005
2012-02-04 07:00:00
577350
13.100000
82
NaN
220.600006
1.0
478
CO
310
1
2010-12-02 06:00:00
In [45]:
epacems_old_datadir = os.path.join(pudl.settings.PUDL_DIR,'results','parquet','epacems-old')
test_datadirs = epacems_old_datadir + '/year*[2000,2001]/*.parquet'
test_dd = dd.read_parquet(test_datadirs, columns=ops_cols)
In [46]:
test_dd
Out[46]:
Dask DataFrame Structure:
co2_mass_tons
facility_id
gross_load_mw
heat_content_mmbtu
operating_time_hours
plant_id_eia
state
unit_id_epa
unitid
operating_datetime
npartitions=294
float32
category[unknown]
float32
float32
float32
uint16
category[unknown]
category[unknown]
category[unknown]
datetime64[ns]
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
Dask Name: read-parquet, 294 tasks
In [33]:
wy_df.sample(20)
Out[33]:
co2_mass_tons
facility_id
gross_load_mw
heat_content_mmbtu
operating_time_hours
plant_id_eia
state
unit_id_epa
unitid
operating_datetime
132324
324.899994
765
300.0
3097.800049
1.0
4162
WY
2639
3
2016-08-03 12:00:00
73809
153.699997
764
150.0
1465.099976
1.0
4158
WY
2635
BW43
2017-05-14 09:00:00
132963
430.100006
819
383.0
4101.200195
1.0
6101
WY
2777
BW91
2017-08-24 03:00:00
140192
NaN
1508
NaN
NaN
0.0
55477
WY
4849
CT2
2017-08-15 08:00:00
66212
331.100006
1069
305.0
3156.699951
1.0
8066
WY
3458
BW74
2017-04-29 20:00:00
15722
447.299988
8296
434.0
4265.100098
1.0
56609
WY
90531
01
2017-01-05 02:00:00
79540
NaN
847
NaN
NaN
0.0
6204
WY
2845
2
2016-05-11 04:00:00
118936
574.299988
1069
545.0
5475.500000
1.0
8066
WY
3455
BW71
2017-07-28 16:00:00
124233
457.500000
8296
448.0
4361.899902
1.0
56609
WY
90531
01
2017-07-01 09:00:00
169491
322.000000
765
299.0
3070.500000
1.0
4162
WY
2639
3
2016-10-27 03:00:00
200585
64.099998
764
57.0
611.200012
1.0
4158
WY
2633
BW41
2017-12-08 17:00:00
90167
NaN
8326
NaN
NaN
0.0
57703
WY
90709
CT03
2017-05-13 23:00:00
134324
161.600006
847
0.0
1541.199951
1.0
6204
WY
2844
1
2016-08-24 20:00:00
22862
310.399994
765
298.0
2960.000000
1.0
4162
WY
2639
3
2016-02-04 14:00:00
35526
87.900002
764
80.0
838.099976
1.0
4158
WY
2633
BW41
2017-03-06 06:00:00
212286
574.700012
1069
560.0
5480.000000
1.0
8066
WY
3457
BW73
2016-12-06 06:00:00
80897
110.699997
967
90.0
1055.800049
1.0
7504
WY
3120
001
2017-05-30 17:00:00
117561
110.800003
967
91.0
1056.500000
1.0
7504
WY
3120
001
2016-07-08 09:00:00
104232
NaN
1510
NaN
NaN
0.0
55479
WY
4852
001
2016-06-04 00:00:00
165614
181.199997
764
167.0
1727.599976
1.0
4158
WY
2635
BW43
2017-10-14 14:00:00
In [ ]:
Content source: catalyst-cooperative/pudl
Similar notebooks: