In [ ]:
%load_ext autoreload
%autoreload 2
import pudl
from pudl import constants as pc
import pathlib
import yaml
import sqlalchemy as sa
from pudl.etl import *
import logging
import sys
import pathlib
import copy
In [ ]:
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]
In [ ]:
settings_file_name= 'etl_example.yml'
clobber=True
pudl_settings = pudl.workspace.setup.get_defaults()
with open(pathlib.Path(pudl_settings['settings_dir'],
settings_file_name),
"r") as f:
settings_file = yaml.safe_load(f)
datapkg_bundle_settings = settings_file['datapkg_bundle_settings']
# validate the settings from the settings file.
validated_bundle_settings = validate_params(
datapkg_bundle_settings, pudl_settings)
In [ ]:
# THIS IS ASSUMING THE PKG W/ EIA IS THE THIRD ONE!
# also, if you don't want to deal w/ the settings file..
# you can just edit your eia_inputs below
datapkg_settings = validated_bundle_settings[2]
etl_params = datapkg_settings['datasets'][0]['eia']
eia_inputs = pudl.etl._validate_params_eia(etl_params)
eia923_tables = eia_inputs['eia923_tables']
eia923_years = eia_inputs['eia923_years']
eia860_tables = eia_inputs['eia860_tables']
eia860_years = eia_inputs['eia860_years']
In [ ]:
# Extract EIA forms 923, 860
eia923_raw_dfs = pudl.extract.eia923.extract(
eia923_years=eia923_years, data_dir=pudl_settings["data_dir"])
eia860_raw_dfs = pudl.extract.eia860.extract(
eia860_years=eia860_years, data_dir=pudl_settings["data_dir"])
# Transform EIA forms 923, 860
eia923_transformed_dfs = pudl.transform.eia923.transform(
eia923_raw_dfs, eia923_tables=eia923_tables)
eia860_transformed_dfs = pudl.transform.eia860.transform(
eia860_raw_dfs, eia860_tables=eia860_tables)
In [ ]:
# create an eia transformed dfs dictionary
eia_transformed_dfs = copy.deepcopy(eia860_transformed_dfs)
eia_transformed_dfs.update(copy.deepcopy(eia923_transformed_dfs))
# convert types..
eia_transformed_dfs = pudl.helpers.convert_dfs_dict_dtypes(
eia_transformed_dfs, 'eia')
In [ ]:
# we want to investigate the harvesting of the plants in this case...
entity = 'plants'
# create the empty entities df to fill up
entities_dfs = {}
entities_dfs, eia_transformed_dfs, col_dfs = pudl.transform.eia._harvesting(
entity, eia_transformed_dfs, entities_dfs,debug=True)
In [ ]:
bac = col_dfs['balancing_authority_code']
In [ ]:
bac