In [1]:
import pickle
import pandas as pd
import numpy as np
import benchmark # the main benchmarking script file
import bench_util # some utility functions related to benchmarking
# Just in case you want to plot something:
import matplotlib.pyplot as plt
# Show Plots in the Notebook
%matplotlib inline
# Increase the size of plots and their fonts
plt.rcParams['figure.figsize']= (10, 8) # set Chart Size
plt.rcParams['font.size'] = 14 # set Font size in Chart
# Use a plotting style'style' the plot using 'bmh' style
plt.style.use('bmh')
In [2]:
# Read the raw Utility Bill data into a DataFrame
df_raw = pd.read_pickle('df_raw.pkl')
# Read the pre-processed data
dfp = pd.read_pickle('df_processed.pkl')
# Read in the bench_util.Util object, which provides some other data that may
# be useful.
util_obj = pickle.load(open('util_obj.pkl', 'rb'))
In [3]:
df_raw.head(3)
Out[3]:
In [4]:
dfp.head(3)
Out[4]:
In [5]:
# Get some other building info about the Big Dipper building
# A dictionary is returned.
util_obj.building_info('DIPMP1')
Out[5]:
In [6]:
# This calls the preprocessing function in the main benchmark script file.
# Values in the 'settings.py' file will determine which files are read.
# NOTE: This will take 4 minutes to run, and the Jupyter Notebook does not
# always show the messages that printed along the way, so it will look like
# your computer is frozen. Be patient.
# Uncomment the line below to run
# df_raw, dfp, util_obj = benchmark.preprocess_data()
In [7]:
# You can't use the Pandas "query" method for columns that have spaces in
# their names. So use the more general, but cumbersome syntax to filter
# the "Site ID" column.
# NOTE: a quick rename of the columns to eliminate spaces is another approach:
# df_raw.columns = [c.replace(' ', '_') for c in df_raw.columns]
df_dip = df_raw[df_raw['Site ID']=='DIPMP1']
df_dip.query("From < '2017-06-01' and From > '2017-01-01' and Units=='kW'")
Out[7]:
In [8]:
# Look at the processed data to compare Actual Demand vs. Demand Charge
df_dip = dfp.query("site_id=='DIPMP1' and units=='kW'")
df_pivot = pd.pivot_table(df_dip, index=['cal_year', 'cal_mo'], values='usage', columns='item_desc', aggfunc=np.sum)
# Do the following to show all rows of the DataFrame
with pd.option_context('display.max_rows', None):
print(df_pivot)