In [181]:
import pickle
from collections import namedtuple
import pandas as pd
import numpy as np
from importlib import reload
import sys
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.dates as mdates
import datetime
# bench_util module is in the parent directory, because it will be part of
# the production code. Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')
import bench_util as bu
import graph_util as gu
In [182]:
# Unpickle the pre-processed DataFrame
df = pickle.load(open('dfu3.pkl', 'rb'))
# Unpickle the raw utility bill DataFrame, which is needed below to make
# the utility function object.
df_raw = pickle.load(open('df_raw.pkl', 'rb'))
df.head() # the processed data
Out[182]:
In [183]:
# this is only needed to update any code changes I may have made
# since last importing the module above.
reload(bu)
reload(gu)
# Make an object that has the various utility functions.
# The object needs access to the raw utility bill DataFrame and the spreadsheet
# containing other application data.
ut = bu.Util(df_raw, '../data/Other_Building_Data.xlsx')
In [184]:
# Testing site. Final code will loop through all sites
site = 'ANSBG1'
In [185]:
template_data = {}
In [186]:
# From the main DataFrame, get only the rows for this site, and only get
# the needed columns for this analysis
usage_df1 = df.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo', 'mmbtu']]
usage_df1.head()
Out[186]:
In [187]:
usage_df2 = pd.pivot_table(
usage_df1,
values='mmbtu',
index=['fiscal_year'],
columns=['service_type'],
aggfunc=np.sum
)
usage_df2 = usage_df2.drop(labels=['Sewer', 'Water'], axis=1)
usage_df2
Out[187]:
In [188]:
# Add in columns for the missing services
missing_services = bu.missing_energy_services(usage_df2.columns)
bu.add_columns(usage_df2, missing_services)
usage_df2
Out[188]:
In [189]:
# Add a Total column that sums the other columns
usage_df2['total_energy'] = usage_df2.sum(axis=1)
cols = ['{}_mmbtu'.format(bu.change_name(col)) for col in usage_df2.columns]
usage_df2.columns = cols
usage_df2
Out[189]:
In [190]:
# Create a list of columns to loop through and calculate percent total energy
usage_cols = list(usage_df2.columns.values)
print (usage_cols)
usage_cols.remove('total_energy_mmbtu')
for col in usage_cols:
col_name = col.split('_mmbtu')[0] + "_pct"
usage_df2[col_name] = usage_df2[col] / usage_df2.total_energy_mmbtu
usage_df2
Out[190]:
In [191]:
# Add in degree days
months_present = bu.months_present(usage_df1)
deg_days = ut.degree_days_yearly(months_present, site)
usage_df2['hdd'] = deg_days
usage_df2
Out[191]:
In [192]:
# Add in a column to show the numbers of months present for each year
# This will help to identify partial years.
mo_count = bu.month_count(months_present)
usage_df2['month_count'] = mo_count
usage_df2
Out[192]:
In [193]:
# Calculate total heat energy and normalized heating usage
usage_df2['total_heat_mmbtu'] = usage_df2.natural_gas_mmbtu + usage_df2.district_heat_mmbtu + usage_df2.fuel_oil_mmbtu
usage_df2['total_specific_heat'] = usage_df2.total_heat_mmbtu * 1000 / usage_df2.hdd
usage_df2 = usage_df2.query("month_count == 12")
usage_df2
Out[193]:
In [194]:
# Reverse the DataFrame
usage_df2.sort_index(ascending=False, inplace=True)
usage_df2 = usage_df2.drop('month_count', axis=1)
usage_df2
Out[194]:
In [195]:
# Reset the index so the fiscal year column can be passed to the graphing function
reset_usage_df2 = usage_df2.reset_index()
p4g2_filename, p4g2_url = gu.graph_filename_url(site, 'annual_energy_usage_distribution')
# Create the area graph
gu.area_use_distribution(reset_usage_df2, 'fiscal_year', usage_cols, p4g2_filename)
In [196]:
p4g1_filename, p4g1_url = gu.graph_filename_url(site, "annual_energy_usage")
gu.energy_use_stacked_bar(reset_usage_df2, 'fiscal_year', usage_cols, p4g1_filename)
In [197]:
# Convert df to dictionary
energy_use_overview_rows = bu.df_to_dictionaries(usage_df2)
# Add data and graphs to main dictionary
template_data['energy_usage_overview'] = dict(
graphs=[p4g1_url, p4g2_url],
table={'rows': energy_use_overview_rows},
)
In [202]:
p5g1_filename, p5g1_url = gu.graph_filename_url(site, "energy_usage")
gu.usage_pie_charts(usage_df2, usage_cols, 1, p5g1_filename, site)
In [ ]:
# Add pie charts to template dictionary
template_data['energy_cost_usage'] = dict(
graphs=[p5g1_url])
In [203]:
site_df = df.query("site_id == @site")
site_df.head()
Out[203]:
In [204]:
# only look at elecricity records
electric_df = site_df.query("service_type == 'Electricity'")
In [205]:
# Make sure I'm not potentially missing anything with funky unit names
check_df = electric_df.query("usage > 0")
check_df.units.unique()
Out[205]:
In [206]:
electric_df = electric_df.query("units == 'kWh' or units == 'kW'")
electric_df.head()
Out[206]:
In [207]:
electric_df.query("units == 'kWh'")['item_desc'].unique()
Out[207]:
In [208]:
electric_df.item_desc.unique()
Out[208]:
In [209]:
electric_pivot_monthly = pd.pivot_table(electric_df,
index=['fiscal_year', 'fiscal_mo'],
columns=['units'],
values='usage',
aggfunc=np.sum)
electric_pivot_monthly.head()
Out[209]:
In [210]:
# Do a month count for the elecricity bills
elec_months_present = bu.months_present(electric_pivot_monthly.reset_index())
elec_mo_count = bu.month_count(elec_months_present)
elec_mo_count_df = pd.DataFrame(elec_mo_count)
elec_mo_count_df
Out[210]:
In [211]:
electric_pivot_annual = pd.pivot_table(electric_df,
index=['fiscal_year'],
columns=['units'],
values='usage',
aggfunc=np.sum
)
electric_use_annual = electric_pivot_annual[['kWh']]
electric_use_annual = electric_use_annual.rename(columns={'kWh':'ann_electric_usage_kWh'})
electric_use_annual
Out[211]:
In [212]:
# Get average annual demand usage
electric_demand_avg = electric_pivot_monthly.groupby(['fiscal_year']).mean()
electric_demand_avg = electric_demand_avg[['kW']]
electric_demand_avg = electric_demand_avg.rename(columns={'kW': 'avg_demand_kW'})
electric_demand_avg
Out[212]:
In [213]:
# Find annual maximum demand usage
electric_demand_max = electric_pivot_monthly.groupby(['fiscal_year']).max()
electric_demand_max = electric_demand_max[['kW']]
electric_demand_max = electric_demand_max.rename(columns={'kW': 'max_demand_kW'})
electric_demand_max
Out[213]:
In [214]:
# Combine dataframes
electric_demand_join = pd.merge(electric_demand_max, electric_demand_avg, how='outer', left_index=True, right_index=True)
annual_electric_data = pd.merge(electric_demand_join, electric_use_annual, how='outer', left_index=True, right_index=True)
annual_electric_data
Out[214]:
In [215]:
# Add percent change columns
annual_electric_data['usage_pct_change'] = annual_electric_data.ann_electric_usage_kWh.pct_change()
annual_electric_data['avg_demand_pct_change'] = annual_electric_data.avg_demand_kW.pct_change()
annual_electric_data['max_demand_pct_change'] = annual_electric_data.max_demand_kW.pct_change()
annual_electric_data = annual_electric_data.rename(columns={'avg_demand_kW': 'Average kW',
'ann_electric_usage_kWh': 'Total kWh'})
annual_electric_data = pd.merge(annual_electric_data, elec_mo_count_df, left_index=True, right_index=True, how='left')
annual_electric_data = annual_electric_data.query("month == 12")
annual_electric_data = annual_electric_data.sort_index(ascending=False)
annual_electric_data = annual_electric_data.rename(columns={'max_demand_kW':'kw_max',
'Average kW':'kw_avg',
'Total kWh':'kwh',
'usage_pct_change':'kwh_pct_change',
'avg_demand_pct_change':'kw_avg_pct_change',
'max_demand_pct_change':'kw_max_pct_change'})
annual_electric_data = annual_electric_data.drop('month', axis=1)
annual_electric_data
Out[215]:
In [216]:
# Axes labels
ylabel1 = 'Electricity Usage [kWh]'
ylabel2 = 'Electricity Demand [kW]'
In [217]:
p6g1_filename, p6g1_url = gu.graph_filename_url(site, "electricity_usage")
gu.stacked_bar_with_line(annual_electric_data.reset_index(), 'fiscal_year', ['kwh'], 'kw_avg',
ylabel1, ylabel2, "Test Title", p6g1_filename)
In [218]:
p6g2_filename, p6g2_url = gu.graph_filename_url(site, "monthly_electricity_usage_profile")
gu.create_monthly_profile(electric_pivot_monthly, 'kWh', 'Monthly Electricity Usage Profile [kWh]', 'blue',
"Test Title", p6g2_filename)
In [219]:
# Convert df to dictionary
electric_use_rows = bu.df_to_dictionaries(annual_electric_data)
# Add data and graphs to main dictionary
template_data['electrical_usage_analysis'] = dict(
graphs=[p6g1_url, p6g2_url],
table={'rows': electric_use_rows},
)
In [220]:
# only look at elecricity records
electric_cost_df = site_df.query("service_type == 'Electricity'")
In [221]:
electric_cost_df.item_desc.unique()
Out[221]:
In [222]:
# Costs don't always have units, so split the data into demand charges and usage charges (which includes other charges)
electric_cost_df['cost_categories'] = np.where(electric_cost_df.item_desc.isin(['KW Charge', 'On peak demand', 'Demand Charge']),
'demand_cost', 'usage_cost')
In [223]:
# Sum costs by demand and usage
electric_annual_cost = pd.pivot_table(electric_cost_df,
index=['fiscal_year'],
columns=['cost_categories'],
values='cost',
aggfunc=np.sum
)
# Create a total column
electric_annual_cost['Total Cost'] = electric_annual_cost.demand_cost + electric_annual_cost.usage_cost
electric_annual_cost
Out[223]:
In [224]:
# Add percent change columns
electric_annual_cost['usage_cost_pct_change'] = electric_annual_cost.usage_cost.pct_change()
electric_annual_cost['demand_cost_pct_change'] = electric_annual_cost.demand_cost.pct_change()
electric_annual_cost['total_cost_pct_change'] = electric_annual_cost['Total Cost'].pct_change()
electric_annual_cost
Out[224]:
In [225]:
# Left join the cost data to the annual electric data, which only shows complete years
electric_use_and_cost = pd.merge(annual_electric_data, electric_annual_cost, left_index=True, right_index=True, how='left')
electric_use_and_cost = electric_use_and_cost.sort_index(ascending=False)
electric_use_and_cost = electric_use_and_cost.drop(['kw_max', 'kw_max_pct_change'], axis=1)
electric_use_and_cost = electric_use_and_cost.rename(columns={'demand_cost':'kw_avg_cost',
'usage_cost':'kwh_cost',
'Total Cost':'total_cost',
'usage_cost_pct_change':'kwh_cost_pct_change',
'demand_cost_pct_change':'kw_avg_cost_pct_change'
})
electric_use_and_cost
Out[225]:
In [226]:
p7g1_filename, p7g1_url = gu.graph_filename_url(site, "electricity_cost")
renamed_use_and_cost = electric_use_and_cost.rename(columns={'kwh_cost':'Electricity Usage Cost [$]',
'kw_avg_cost':'Electricity Demand Cost [$]'})
gu.create_stacked_bar(renamed_use_and_cost.reset_index(), 'fiscal_year', ['Electricity Usage Cost [$]',
'Electricity Demand Cost [$]'],
'Electricity Cost [$]', "test title", p7g1_filename)
In [227]:
# Create Monthly Profile of Electricity Demand
p7g2_filename, p7g2_url = gu.graph_filename_url(site, "monthly_electricity_demand_profile")
gu.create_monthly_profile(electric_pivot_monthly, 'kW', 'Monthly Electricity Demand Profile [kW]', 'blue',
"test title", p7g2_filename)
In [228]:
# Convert df to dictionary
electric_cost_rows = bu.df_to_dictionaries(electric_use_and_cost)
# Add data and graphs to main dictionary
template_data['electrical_cost_analysis'] = dict(
graphs=[p7g1_url, p7g2_url],
table={'rows': electric_cost_rows},
)
In [229]:
usage_df2.head()
Out[229]:
In [230]:
# Take only needed columns from earlier usage df
heating_usage = usage_df2[['natural_gas_mmbtu', 'fuel_oil_mmbtu', 'district_heat_mmbtu', 'hdd', 'total_heat_mmbtu']]
heating_usage
Out[230]:
In [231]:
# Add in percent change columns
heating_usage['fuel_oil_pct_change'] = heating_usage.fuel_oil_mmbtu.pct_change()
heating_usage['natural_gas_pct_change'] = heating_usage.natural_gas_mmbtu.pct_change()
heating_usage['district_heat_pct_change'] = heating_usage.district_heat_mmbtu.pct_change()
heating_usage['total_heat_pct_change'] = heating_usage.total_heat_mmbtu.pct_change()
heating_usage
Out[231]:
In [232]:
# Get the number of gallons, ccf, and 1,000 pounds of district heat by converting MMBTUs using the supplied conversions
heating_usage['fuel_oil_usage'] = heating_usage.fuel_oil_mmbtu * 1000000 / ut.fuel_btus_per_unit('Oil #1', 'gallons')
heating_usage['natural_gas_usage'] = heating_usage.natural_gas_mmbtu * 1000000 / ut.fuel_btus_per_unit('Natural Gas', 'ccf')
heating_usage
Out[232]:
In [233]:
p8g1_filename, p8g1_url = gu.graph_filename_url(site, "heating_degree_days")
gu.stacked_bar_with_line(heating_usage.reset_index(), 'fiscal_year', ['natural_gas_mmbtu', 'fuel_oil_mmbtu',
'district_heat_mmbtu'], 'hdd',
'Heating Fuel Usage [MMBTU/yr]', 'Heating Degree Days [Base 65F]', "test title", p8g1_filename)
In [234]:
monthly_heating = pd.pivot_table(usage_df1,
values='mmbtu',
index=['fiscal_year', 'fiscal_mo'],
columns=['service_type'],
aggfunc=np.sum
)
monthly_heating.head()
Out[234]:
In [235]:
monthly_heating
Out[235]:
In [236]:
# Add in columns for the missing energy services
missing_services = bu.missing_energy_services(monthly_heating.columns)
bu.add_columns(monthly_heating, missing_services)
# Drop the non-heating services
monthly_heating = monthly_heating.drop(labels=['Electricity', 'Sewer', 'Water'], axis=1)
# Create a total heating column
monthly_heating['total_heating_energy'] = monthly_heating.sum(axis=1)
monthly_heating.head()
Out[236]:
In [237]:
p8g2_filename, p8g2_url = gu.graph_filename_url(site, "monthly_heating_energy_profile")
gu.create_monthly_profile(monthly_heating, 'total_heating_energy', "Monthly Heating Energy Profile [MMBTU]", 'red',
"test title", p8g2_filename)
In [238]:
# Convert df to dictionary
heating_use_rows = bu.df_to_dictionaries(heating_usage)
# Add data and graphs to main dictionary
template_data['heating_usage_analysis'] = dict(
graphs=[p8g1_url, p8g2_url],
table={'rows': heating_use_rows},
)
In [239]:
# Import df that I exported from "alan_report_pages". This can be removed once code is combined
df2 = pd.read_csv(r"C:\Users\dustin\Google Drive\FNSB Data Analysis\data_from_alan_report_pages_df2.csv")
# Use only necessary columns
heating_cost = df2[['fiscal_year', 'Natural Gas', 'Oil #1', 'Steam', 'Total', 'pct_change']]
# Change column names so they aren't the same as the heating usage dataframe
heating_cost = heating_cost.rename(columns={'Natural Gas':'natural_gas_cost',
'Oil #1': 'fuel_oil_cost',
'Steam': 'district_heat_cost',
'Total': 'total_heat_cost',
'pct_change': 'total_heat_cost_pct_change'})
heating_cost
Out[239]:
In [240]:
heating_usage
Out[240]:
In [241]:
# Combine the heating cost and heating use dataframes
heating_cost_and_use = pd.merge(heating_cost, heating_usage, left_on='fiscal_year', right_index=True, how='right')
heating_cost_and_use
Out[241]:
In [242]:
# Create percent change columns
heating_cost_and_use['fuel_oil_pct_change'] = heating_cost_and_use.fuel_oil_cost.pct_change()
heating_cost_and_use['natural_gas_pct_change'] = heating_cost_and_use.natural_gas_cost.pct_change()
heating_cost_and_use['district_heat_pct_change'] = heating_cost_and_use.district_heat_cost.pct_change()
In [243]:
# Create unit cost columns
heating_cost_and_use['fuel_oil_unit_cost'] = heating_cost_and_use.fuel_oil_cost / heating_cost_and_use.fuel_oil_mmbtu
heating_cost_and_use['natural_gas_unit_cost'] = heating_cost_and_use.natural_gas_cost / heating_cost_and_use.natural_gas_mmbtu
heating_cost_and_use['district_heat_unit_cost'] = heating_cost_and_use.district_heat_cost / heating_cost_and_use.district_heat_mmbtu
heating_cost_and_use['building_heat_unit_cost'] = heating_cost_and_use.total_heat_cost / heating_cost_and_use.total_heat_mmbtu
heating_cost_and_use
Out[243]:
In [244]:
# Remove all columns not needed for the Heating Cost Analysis Table
heating_cost_and_use = heating_cost_and_use[['fiscal_year',
'fuel_oil_cost',
'fuel_oil_pct_change',
'natural_gas_cost',
'natural_gas_pct_change',
'district_heat_cost',
'district_heat_pct_change',
'fuel_oil_unit_cost',
'natural_gas_unit_cost',
'district_heat_unit_cost',
'building_heat_unit_cost',
'total_heat_cost',
'total_heat_cost_pct_change']]
heating_cost_and_use
Out[244]:
In [245]:
print (df.shape[0])
# Filter out natural gas customer charges as the unit cost goes to infinity if there is a charge but no use
df_no_gas_cust_charges = df.drop(df[(df['service_type'] == 'Natural Gas') & (df['units'] != 'CCF')].index)
# Filter out records with zero usage, which correspond to things like customer charges, etc.
nonzero_usage = df_no_gas_cust_charges.query("usage > 0")
print (nonzero_usage.shape[0])
nonzero_usage.head()
Out[245]:
In [246]:
# Check to make sure it is okay to drop records w/ zero mmbtu
zero_mmbtu = nonzero_usage.query("mmbtu == 0")
zero_mmbtu.service_type.unique()
Out[246]:
In [247]:
nonzero_usage = nonzero_usage.query("mmbtu > 0")
print (nonzero_usage.shape[0])
nonzero_usage.head()
Out[247]:
In [248]:
nonzero_usage.cost.min()
Out[248]:
In [249]:
# Further analysis showed that these zero and less than zero costs were for waste oil; presumably less than zero costs
# was because they were able to avoid disposal fees or something
nonzero_usage.query("cost <= 0")
Out[249]:
In [250]:
# Looks like waste oil accounts for the negative costs
BALHHW = df_raw[(df_raw['Site ID'] == 'BALHHW')].sort_values(by='Cost')
BALHHW[BALHHW['Service Name'] == 'Oil #1']
Out[250]:
In [251]:
# No idea why these costs are negative but it still seems like it should be filtered out
df_raw[(df_raw['Site ID'] == 'BENBG1')].sort_values(by='Cost')
Out[251]:
In [252]:
# Filter out zero cost or less records (see analysis above)
nonzero_usage = nonzero_usage.query("cost > 0")
In [253]:
# Get the total fuel cost and usage for all buildings by year and month
grouped_nonzero_usage = nonzero_usage.groupby(['service_type', 'fiscal_year', 'fiscal_mo']).sum()
# Divide the total cost for all building by the total usage for all buildings so that the average is weighted correctly
grouped_nonzero_usage['avg_price_per_mmbtu'] = grouped_nonzero_usage.cost / grouped_nonzero_usage.mmbtu
# Get only the desired outcome, price per million BTU for each fuel type, and the number of calendar months it is based on
# i.e. the number of months of bills for each fuel for all buildings for that particular month.
grouped_nonzero_usage = grouped_nonzero_usage[['avg_price_per_mmbtu', 'cal_mo']]
In [254]:
# Drop electricity from the dataframe.
grouped_nonzero_usage = grouped_nonzero_usage.reset_index()
grouped_nonzero_heatfuel_use = grouped_nonzero_usage.query("service_type != 'Electricity'")
# Create a column for each service type
grouped_nonzero_heatfuel_use = pd.pivot_table(grouped_nonzero_heatfuel_use,
values='avg_price_per_mmbtu',
index=['fiscal_year', 'fiscal_mo'],
columns='service_type'
)
grouped_nonzero_heatfuel_use = grouped_nonzero_heatfuel_use.reset_index()
grouped_nonzero_heatfuel_use.head()
Out[254]:
In [255]:
raw_oil = df_raw[(df_raw['Service Name'] == 'Oil #1') & (df_raw['Item Description'] != 'Fuel Oil #1 (Gallons)')]
In [256]:
raw_oil['Item Description'].unique()
Out[256]:
In [257]:
raw_oil.query("Units != 'Gallons'")['Cost'].sum()
Out[257]:
In [258]:
raw_gas_analysis = df_raw[(df_raw['Service Name'] == 'Natural Gas') & (df_raw['Item Description'] != 'Natural gas (CCF)')]
In [259]:
raw_gas_analysis['Item Description'].unique()
Out[259]:
In [260]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Misc. credit']
Out[260]:
In [261]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Cost adjustments']
Out[261]:
In [262]:
raw_gas_analysis[raw_gas_analysis['Item Description'] == 'Previous balance adj.']
Out[262]:
In [263]:
# Heating energy use, in MMBTUs
monthly_heating.head()
Out[263]:
In [264]:
# Query the dataframe for natural gas charges with CCF only?
df.query("service_type == 'Natural Gas'").head()
Out[264]:
In [265]:
# Exclude demand charges from the natural gas costs. This is because the unit costs for natural gas go to infinity
# when there is zero usage but a customer charge
cost_df1 = df.drop(df[(df['service_type'] == 'Natural Gas') & (df['units'] != 'CCF')].index)
cost_df1.query("service_type == 'Natural Gas'").head()
Out[265]:
In [266]:
# Create cost dataframe for given site from processed data
cost_df1 = cost_df1.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo', 'cost']]
cost_df1.head()
Out[266]:
In [267]:
# Split out by service type
monthly_heating_cost = pd.pivot_table(cost_df1,
values='cost',
index=['fiscal_year', 'fiscal_mo'],
columns=['service_type'],
aggfunc=np.sum
)
monthly_heating_cost.head()
Out[267]:
In [268]:
# Add in columns for the missing energy services
missing_services = bu.missing_energy_services(monthly_heating_cost.columns)
bu.add_columns(monthly_heating_cost, missing_services)
# Drop the non-heating services
monthly_heating_cost = monthly_heating_cost.drop(labels=['Electricity', 'Sewer', 'Water'], axis=1)
# Create a total heating column
monthly_heating_cost['total_heating_cost'] = monthly_heating_cost.sum(axis=1)
monthly_heating_cost.head()
Out[268]:
In [269]:
monthly_heating_cost = monthly_heating_cost.rename(columns={'Natural Gas':'Natural Gas Cost',
'Oil #1':'Oil #1 Cost',
'Steam': 'Steam Cost'})
monthly_heating_cost.head()
Out[269]:
In [270]:
monthly_heat_energy_and_use = pd.merge(monthly_heating_cost, monthly_heating, left_index=True, right_index=True, how='outer')
monthly_heat_energy_and_use.head()
Out[270]:
In [271]:
# Create unit cost columns in $ / MMBTU for each fuel type
monthly_heat_energy_and_use['fuel_oil_unit_cost'] = monthly_heat_energy_and_use['Oil #1 Cost'] / monthly_heat_energy_and_use['Oil #1']
monthly_heat_energy_and_use['natural_gas_unit_cost'] = monthly_heat_energy_and_use['Natural Gas Cost'] / monthly_heat_energy_and_use['Natural Gas']
monthly_heat_energy_and_use['district_heat_unit_cost'] = monthly_heat_energy_and_use['Steam Cost'] / monthly_heat_energy_and_use['Steam']
monthly_heat_energy_and_use['building_unit_cost'] = monthly_heat_energy_and_use.total_heating_cost / monthly_heat_energy_and_use.total_heating_energy
monthly_heat_energy_and_use.head()
Out[271]:
In [272]:
# Reset the index for easier processing
monthly_heat_energy_and_use = monthly_heat_energy_and_use.reset_index()
In [273]:
# Add in unit costs for fuels that are currently blank
unit_cost_cols = ['fuel_oil_unit_cost', 'natural_gas_unit_cost', 'district_heat_unit_cost']
service_types = ['Oil #1_avg_unit_cost', 'Natural Gas_avg_unit_cost', 'Steam_avg_unit_cost']
unit_cost_dict = dict(zip(unit_cost_cols,service_types))
In [274]:
# Add in average unit costs calculated from all sites for each month
monthly_heat_energy_and_use = pd.merge(monthly_heat_energy_and_use, grouped_nonzero_heatfuel_use,
left_on=['fiscal_year', 'fiscal_mo'], right_on=['fiscal_year', 'fiscal_mo'],
how='left', suffixes=('', '_avg_unit_cost'))
monthly_heat_energy_and_use.head()
Out[274]:
In [275]:
# Check each column to see if it is NaN (identified when the value does not equal itself) and if it is, fill with the average
# price per MMBTU taken from all sites
for col, service in unit_cost_dict.items():
monthly_heat_energy_and_use[col] = np.where(monthly_heat_energy_and_use[col] != monthly_heat_energy_and_use[col],
monthly_heat_energy_and_use[service],
monthly_heat_energy_and_use[col])
In [276]:
def fiscal_to_calendar(fiscal_year, fiscal_mo):
"""Converts a fiscal year and month into a calendar year and month for graphing purposes.
Returns (calendar_year, calendar_month) tuple."""
if fiscal_mo > 6:
calendar_month = fiscal_mo - 6
calendar_year = fiscal_year
else:
calendar_month = fiscal_mo + 6
calendar_year = fiscal_year - 1
return (calendar_year, calendar_month)
In [277]:
# Add calendar year and month columns
cal_year = []
cal_mo = []
for fiscal_year, fiscal_mo in zip(monthly_heat_energy_and_use.fiscal_year, monthly_heat_energy_and_use.fiscal_mo):
CalYear, CalMo = fiscal_to_calendar(fiscal_year, fiscal_mo)
cal_year.append(CalYear)
cal_mo.append(CalMo)
monthly_heat_energy_and_use['calendar_year'] = cal_year
monthly_heat_energy_and_use['calendar_mo'] = cal_mo
monthly_heat_energy_and_use.head()
Out[277]:
In [278]:
# Create a date column using the calendar year and month to pass to the graphing function
def get_date(row):
return datetime.date(year=row['calendar_year'], month=row['calendar_mo'], day=1)
monthly_heat_energy_and_use['date'] = monthly_heat_energy_and_use[['calendar_year','calendar_mo']].apply(get_date, axis=1)
monthly_heat_energy_and_use.head()
Out[278]:
In [289]:
%matplotlib inline
In [290]:
p9g1_filename, p9g1_url = gu.graph_filename_url(site, "energy_cost")
gu.fuel_price_comparison_graph(monthly_heat_energy_and_use, 'date', unit_cost_cols, 'building_unit_cost', p9g1_filename)
In [280]:
monthly_heat_energy_and_use.head()
Out[280]:
In [281]:
old_usage_cols = ['Natural Gas', 'Oil #1', 'Steam']
In [282]:
# Create an indicator for whether a given heating fuel is available for the facility. This is done by checking the use for all
# months- if it is zero, then that building doesn't have the option to use that type of fuel.
for col in old_usage_cols:
new_col_name = col + "_available"
monthly_heat_energy_and_use[new_col_name] = np.where(monthly_heat_energy_and_use[col].sum() == 0, 0, 1)
monthly_heat_energy_and_use.head()
Out[282]:
In [283]:
# Calculate what it would have cost if the building used only one fuel type
available_cols = ['Oil #1_available','Natural Gas_available','Steam_available']
available_dict = dict(zip(unit_cost_cols, available_cols))
print (available_dict)
hypothetical_cost_cols = []
for unit_cost, avail_col in available_dict.items():
new_col_name = unit_cost + "_hypothetical"
hypothetical_cost_cols.append(new_col_name)
monthly_heat_energy_and_use[new_col_name] = monthly_heat_energy_and_use[unit_cost] * \
monthly_heat_energy_and_use.total_heating_energy * monthly_heat_energy_and_use[avail_col]
monthly_heat_energy_and_use.head()
Out[283]:
In [284]:
# Calculate the monthly savings to the building by not using the most expensive available fuel entirely
monthly_heat_energy_and_use['fuel_switching_savings'] = monthly_heat_energy_and_use[hypothetical_cost_cols].max(axis=1) \
- monthly_heat_energy_and_use.total_heating_cost
In [285]:
# Sort dataframe to calculate cumulative value
monthly_heat_energy_and_use = monthly_heat_energy_and_use.sort_values(by='date', ascending=True)
# Calculate cumulative value
monthly_heat_energy_and_use['cumulative_fuel_switching_savings'] = np.cumsum(monthly_heat_energy_and_use.fuel_switching_savings)
monthly_heat_energy_and_use.head()
Out[285]:
In [286]:
p9g2_filename, p9g2_url = gu.graph_filename_url(site, "cumulative_fuel_switching_savings")
gu.create_monthly_line_graph(monthly_heat_energy_and_use, 'date', 'cumulative_fuel_switching_savings',
'Cumulative Fuel Switching Savings Realized [$]', p9g2_filename)
In [287]:
# Convert df to dictionary
heating_cost_rows = bu.df_to_dictionaries(heating_cost_and_use)
# Add data and graphs to main dictionary
template_data['heating_cost_analysis'] = dict(
graphs=[p9g1_url, p9g2_url],
table={'rows': heating_cost_rows},
)
In [ ]:
water_use = df.query('site_id == @site')[['service_type', 'fiscal_year', 'fiscal_mo','cost', 'usage', 'units']]
water_use.head()
In [ ]:
# Create month count field for all months that have water and sewer bills
water_use_only = water_use.query("service_type == 'Water'")
water_months_present = bu.months_present(water_use_only)
water_mo_count = bu.month_count(water_months_present)
water_mo_count
In [ ]:
# Create annual water gallon usage dataframe
water_gal_df = pd.pivot_table(water_use,
values='usage',
index=['fiscal_year',],
columns=['service_type'],
aggfunc=np.sum
)
water_gal_df.head()
In [ ]:
# Use only required columns
water_gal_df = water_gal_df[['Water']]
# Calculate percent change column
water_gal_df['water_use_pct_change'] = water_gal_df.Water.pct_change()
In [ ]:
In [ ]:
# Create annual water and sewer cost dataframe
water_cost_df = pd.pivot_table(water_use,
values='cost',
index=['fiscal_year',],
columns=['service_type'],
aggfunc=np.sum
)
water_cost_df.head()
In [ ]:
# Calculate totals, percent change
water_cost_df = water_cost_df[water_cost_df.columns.difference(['Electricity', 'Natural Gas', 'Oil #1', 'Steam', 'Refuse'])]
# Rename columns only if they exist in the water cost dataframe
rename_dict = {'Sewer': 'Sewer Cost',
'Water': 'Water Cost'}
water_cost_df = water_cost_df.rename(columns={k: v for k, v in rename_dict.items() if k in water_cost_df})
# First check to make sure sewer data is included; if so, calculate total cost
water_cost_df['total_water_sewer_cost'] = np.where('Sewer Cost' in list(water_cost_df.columns.values),
water_cost_df['Sewer Cost'] + water_cost_df['Water Cost'],
water_cost_df['Water Cost'])
In [ ]:
water_cost_df['water_cost_pct_change'] = water_cost_df['Water Cost'].pct_change()
# First check to make sure sewer data is included; if so, calculate percent change
water_cost_df['sewer_cost_pct_change'] = np.where('Sewer Cost' in list(water_cost_df.columns.values),
water_cost_df['Sewer Cost'].pct_change(),
np.nan)
water_cost_df['total_water_sewer_cost_pct_change'] = water_cost_df.total_water_sewer_cost.pct_change()
In [ ]:
In [ ]:
# Merge use and cost dataframes
water_use_and_cost = pd.merge(water_cost_df, water_gal_df, left_index=True, right_index=True, how='outer')
water_use_and_cost.head()
In [ ]:
water_use_and_cost['water_unit_cost'] = water_use_and_cost.total_water_sewer_cost / water_use_and_cost.Water
water_use_and_cost['water_unit_cost_pct_change'] = water_use_and_cost.water_unit_cost.pct_change()
# Use only complete years
water_use_and_cost['month_count'] = water_mo_count
water_use_and_cost = water_use_and_cost.query("month_count == 12")
water_use_and_cost = water_use_and_cost.drop('month_count', axis=1)
water_use_and_cost = water_use_and_cost.sort_index(ascending=False)
water_use_and_cost = water_use_and_cost.rename(columns={'Sewer Cost':'sewer_cost',
'Water Cost':'water_cost',
'total_water_sewer_cost':'total_cost',
'total_water_sewer_cost_pct_change':'total_cost_pct_change',
'Water':'total_usage',
'water_usage_pct_change':'total_usage_pct_change',
'water_unit_cost':'total_unit_cost',
'water_unit_cost_pct_change':'total_unit_cost_pct_change'
})
water_use_and_cost
In [ ]:
p10g1_filename, p10g1_url = gu.graph_filename_url(site, "utility_cost")
gu.create_stacked_bar(water_use_and_cost.reset_index(), 'fiscal_year', ['sewer_cost', 'water_cost'],
'Utility Cost [$]', "test title", p10g1_filename)
In [ ]:
# Create monthly water gallon dataframe
water_gal_df_monthly = pd.pivot_table(water_use,
values='usage',
index=['fiscal_year', 'fiscal_mo'],
columns=['service_type'],
aggfunc=np.sum
)
water_gal_df_monthly.head()
In [ ]:
p10g2_filename, p10g2_url = gu.graph_filename_url(site, "monthly_water_usage_profile")
gu.create_monthly_profile(water_gal_df_monthly, 'Water', 'Monthly Water Usage Profile [gallons]', 'green', p10g2_filename)
In [ ]:
# Convert df to dictionary
water_rows = bu.df_to_dictionaries(water_use_and_cost)
# Add data and graphs to main dictionary
template_data['water_analysis'] = dict(
graphs=[p10g1_url, p10g2_url],
table={'rows': water_rows},
)
In [ ]:
template_data