In [9]:
import time
import pickle
import glob
import os
import pprint
import datetime
import warnings
import pandas as pd
import numpy as np
import bench_util as bu
import graph_util as gu
import template_util
import shutil
import settings # the file holding settings for this script
from importlib import reload
In [22]:
# import matplotlib pyplot commands
from matplotlib.pyplot import *
# Show Plots in the Notebook
%matplotlib inline
# 'style' the plot like fivethirtyeight.com website
style.use('bmh')
rcParams['figure.figsize']= (10, 8) # set Chart Size
rcParams['font.size'] = 14 # set Font size in Chart
# or if imported as plt
import matplotlib.pyplot as plt
plt.style.use('bmh')
In [10]:
dfaris = pd.read_pickle('data/aris_records.pkl')
dfaris.head()
Out[10]:
In [18]:
dfaris['Thru_year'] = [x.year for x in dfaris.Thru]
dfaris.head()
Out[18]:
In [19]:
site_yr = list(set(zip(dfaris['Site ID'], dfaris.Thru_year)))
len(site_yr)
Out[19]:
In [21]:
dfsu = pd.DataFrame(site_yr, columns=['site_id', 'year'])
dfsu.head()
Out[21]:
In [23]:
dfsu.year.hist()
Out[23]:
In [26]:
df_yr_ct = dfsu.groupby('site_id').count()
df_yr_ct.year.hist()
xlabel('Number of Years of data')
ylabel('Number of Sites')
Out[26]:
In [27]:
df_yr_ct.query('year > 8')
Out[27]:
In [28]:
len(df_yr_ct)
Out[28]:
In [2]:
df = pickle.load(open('df_processed.pkl', 'rb'))
ut = pickle.load(open('util_obj.pkl', 'rb'))
df.head()
Out[2]:
In [7]:
last_complete_year = 2017
df1 = df.query('fiscal_year == @last_complete_year')
# Get Total Utility cost by building. This includes non-energy utilities as well.
df2 = df1.pivot_table(index='site_id', values=['cost'], aggfunc=np.sum)
df2['fiscal_year'] = last_complete_year
df2.reset_index(inplace=True)
df2.set_index(['site_id', 'fiscal_year'], inplace=True)
df2 = bu.add_month_count_column_by_site(df2, df1)
df2.head()
Out[7]:
In [8]:
df2.query('month_count==12').head()
Out[8]:
In [60]:
df.sum()
Out[60]:
In [61]:
df.service_type.unique()
Out[61]:
In [33]:
reload(bu)
# Filter down to only services that are energy services.
energy_services = bu.missing_energy_services([])
df4 = df.query('service_type==@energy_services').copy()
# Sum Energy Costs and Usage
df5 = pd.pivot_table(df4, index=['site_id', 'fiscal_year'], values=['cost', 'mmbtu'], aggfunc=np.sum)
df5.head()
Out[33]:
In [34]:
# Add a column showing number of months present in each fiscal year.
df5 = bu.add_month_count_column_by_site(df5, df4)
df5.head()
Out[34]:
In [35]:
dfe = df4.query("service_type=='Electricity'").groupby(['site_id', 'fiscal_year']).sum()[['mmbtu']]
dfe.rename(columns={'mmbtu': 'elec_mmbtu'}, inplace = True)
df5 = df5.merge(dfe, how='left', left_index=True, right_index=True)
df5['elec_mmbtu'] = df5['elec_mmbtu'].fillna(0.0)
df5['heat_mmbtu'] = df5.mmbtu - df5.elec_mmbtu
df5.head()
Out[35]:
In [36]:
# Create a DataFrame with site, year, month and degree-days, but only one row
# for each site/year/month combo.
dfd = df4[['site_id', 'fiscal_year', 'fiscal_mo']].copy()
dfd.drop_duplicates(inplace=True)
ut.add_degree_days_col(dfd)
# Use the agg function below so that a NaN will be returned for the year
# if any monthly values are NaN
dfd = dfd.groupby(['site_id', 'fiscal_year']).agg({'degree_days': lambda x: np.sum(x.values)})[['degree_days']]
dfd.head()
Out[36]:
In [37]:
df5 = df5.merge(dfd, how='left', left_index=True, right_index=True)
df5.head()
Out[37]:
In [38]:
# Add in some needed building like square footage, primary function
# and building category.
df_bldg = ut.building_info_df()
df_bldg.head()
Out[38]:
In [39]:
# Shrink to just the needed fields and remove index
df_info = df_bldg[['sq_ft', 'site_category', 'primary_func']].copy().reset_index()
# Remove the index from df5 so that merging is easier.
df5.reset_index(inplace=True)
# merge in building info
df5 = df5.merge(df_info, how='left')
df5.head()
Out[39]:
In [40]:
df5.tail()
Out[40]:
In [41]:
# Look at one that is missing from Building Info to see if
# Left join worked.
df5.query('site_id == "TWOCOM"')
Out[41]:
In [42]:
df5['eui'] = df5.mmbtu * 1e3 / df5.sq_ft
df5['eci'] = df5.cost / df5.sq_ft
df5['specific_eui'] = df5.heat_mmbtu * 1e6 / df5.degree_days / df5.sq_ft
# Restrict to full years
df5 = df5.query("month_count == 12").copy()
df5.head()
Out[42]:
In [43]:
df5 = df5[['site_id', 'fiscal_year', 'eui', 'eci', 'specific_eui', 'site_category', 'primary_func']].copy()
df5.head()
Out[43]:
In [44]:
df5.to_pickle('df5.pkl')
In [45]:
pd.read_pickle('df5.pkl').head()
Out[45]:
In [67]:
site_id = '03'
df = pd.read_pickle('df_processed.pkl', compression='bz2')
df_utility_cost = pd.read_pickle('df_utility_cost.pkl')
df_usage = pd.read_pickle('df_usage.pkl')
util_obj = pickle.load(open('util_obj.pkl', 'rb'))
In [68]:
df_utility_cost.head()
Out[68]:
In [71]:
df_usage.head()
Out[71]: