In [17]:
import pandas as pd
import urllib2,urllib
import json
import datetime
import numpy as np
from copy import deepcopy
import pylab
import matplotlib.pyplot as plt
%matplotlib inline
In [18]:
cal_temps_text = pd.read_csv('../../data/cal_complete_temps.csv')
cal_temps_text.columns
index_df_by_date(cal_temps_text)
cal_temps_text.sort_index()
print
In [19]:
cal_gas_text = pd.read_csv('../../data/calt_gas_data.csv')
cal_gas_text.columns
index_df_by_date(cal_gas_text)
cal_gas_text.sort_index()
print
In [20]:
cal_elec_text = pd.read_csv('../../data/cal_elec_data.csv')
cal_elec_text.columns
index_df_by_date(cal_elec_text)
cal_elec_text.sort_index()
print
In [21]:
cal_prism_text = pd.read_csv('../../data/caltest_prism_run_elec.csv')
print cal_prism_text.columns
index_df_by_date(cal_prism_text)
cal_prism_text.sort_index()
print
In [22]:
#This allows us to see all of the available account numbers we can test with.
cal_elec_text['account_number'].unique()
Out[22]:
In [30]:
#Currently, we are taking an account number, looking up that account numbers electricity usage
#in the cal_elec and cal_prism dataframes. Using the cal_prism dataframe, we find that account
#numbers corresponding zip code. We then take the zip code and use google's API to find the
#corresponding lat/lng. We take the lat/lng and use NREL's API to find the correspodning weather
#station. We take that station_id to find the corresponding temperature at that place.
#account number -> zip code using cal_prism -> lat/lng using google -> station_id using NREL ->
#tempreature using cal_temps
account_number= 'OVYM2X'
cal_elec=deepcopy(cal_elec_text.loc[cal_elec_text['account_number'] == account_number])
cal_gas=deepcopy(cal_gas_text.loc[cal_gas_text['account_number'] == account_number])
cal_prism=deepcopy(cal_prism_text.loc[cal_prism_text['account_id'] == account_number])
#Input Parameters (eventually will come from front-end website)
zip_code=str(cal_prism['zipcode'][0])
pre_date=pd.to_datetime('1/10/2010')
post_date=pd.to_datetime('1/10/2010')
cal_hdd_temp_range=range(50,65)
cal_cdd_temp_range=range(50,65)
model_type=['both']
In [31]:
station_id=get_station_id_from_zip_code(zip_code)
cal_temps=deepcopy(cal_temps_text.loc[cal_temps_text['usaf'] == station_id])
In [32]:
#BOTH
a=[]
c=[]
for type_of_dd in model_type:
best_r2_adj=0
for cdd_temp in cal_cdd_temp_range:
cal_temps=get_cdd(cdd_temp,cal_temps)
for hdd_temp in cal_hdd_temp_range:
cal_temps=get_hdd(hdd_temp,cal_temps)
cal_elec['use_per_day']=cal_elec['kwh']/cal_elec['datenum'].diff()
cal_elec['num_days']=cal_elec['datenum'].diff()
cal_all=pd.merge(cal_elec,cal_temps,left_index=True,right_index=True)
if(type_of_dd is 'hdd' or 'both'):
cal_all['hdd_per_day']=cal_all['hdd_cum'].diff()/cal_all['datenum'].diff()
if(type_of_dd is 'cdd' or 'both'):
cal_all['cdd_per_day']=cal_all['cdd_cum'].diff()/cal_all['datenum'].diff()
cal_all_pre=cal_all.sort_index()[:pre_date]
cal_all_post=cal_all.sort_index()[post_date:]
results=degree_day_regression(cal_all_pre,type_of_dd)
r2_adj=np.array(results['R2_adj'])[0]
if(r2_adj>best_r2_adj):
best_cdd_temp=cdd_temp
best_hdd_temp=hdd_temp
best_r2_adj=r2_adj
slope_hdd=np.array(results['HDD'])[0]
slope_cdd=np.array(results['CDD'])[0]
intercept=np.array(results['intercept'])[0]
a.append(r2_adj)
c.append(a)
a=[]
#print str(cdd_temp)+"," +str(hdd_temp)+" "+ str(r2_adj)
print "Best CDD set point:" + str(best_cdd_temp)
print "BEST HDD set point:" + str(best_hdd_temp)
print "BEST r2_adj:" + str(best_r2_adj)
print "Best HDD slope:"+str(slope_hdd)
print "Best CDD slope:"+str(slope_cdd)
print "Best intercept:"+str(intercept)
In [45]:
plt.plot(cal_all['temps'],cal_all['use_per_day'],'.')
x1=np.arange(60,100,(36)/float(len(cal_temps['temps'])))
x2=np.arange(30,60,(22)/float(len(cal_temps['temps'])))
y=(x1-best_cdd_temp)*slope_cdd+intercept
y2=(x2-best_hdd_temp)*slope_hdd+intercept
plt.plot(x1,y,'k')
plt.plot(x2,y2,'b')
Out[45]:
In [ ]:
daily_savings=[]
num_post_days_sum=0
for i,val in enumerate(cal_all_post['cdd_per_day']):
cdd_post_day=cal_all_post['cdd_per_day'][i]
hdd_post_day=cal_all_post['hdd_per_day'][i]
use_post_day=cal_all_post['use_per_day'][i]
num_post_days=cal_all_post['num_days'][i]
num_post_days_sum=num_post_days_sum+num_post_days
kwh_per_day=(hdd_post_day*slope_hdd+cdd_post_day*slope_cdd+intercept)
daily_savings.append((kwh_per_day-use_post_day)*0.15*num_post_days)
print 'You have an estimated savings of $'+str(sum(daily_savings)/num_post_days_sum*365)+" per year!"
In [ ]:
daily_savings=[]
num_pre_days_sum=0
for i,val in enumerate(cal_all_pre['cdd_per_day']):
cdd_pre_day=cal_all_pre['cdd_per_day'][i]
hdd_pre_day=cal_all_pre['hdd_per_day'][i]
use_pre_day=cal_all_pre['use_per_day'][i]
num_pre_days=cal_all_pre['num_days'][i]
if(num_pre_days != num_pre_days):
num_pre_days=0
if(use_pre_day != use_pre_day):
use_pre_day=0
num_pre_days_sum=num_pre_days_sum+num_pre_days
kwh_per_day=(hdd_pre_day*slope_hdd+cdd_pre_day*slope_cdd+intercept)
if(kwh_per_day != kwh_per_day):
kwh_per_day=0
daily_savings.append((kwh_per_day-use_pre_day)*0.15*num_pre_days)
print 'You have an estimated savings of $'+str(sum(daily_savings)/num_pre_days_sum*365)+" per year!"
In [20]:
#This plots the CDD and HDD so we can better visualize where the best set point temperatures are.
plt.clf()
pylab.rcParams['figure.figsize'] = 12, 9
cal_hdd_min=cal_hdd_temp_range[0]
cal_hdd_max=cal_hdd_temp_range[len(cal_hdd_temp_range)-1]
cal_cdd_min=cal_cdd_temp_range[0]
cal_cdd_max=cal_cdd_temp_range[len(cal_cdd_temp_range)-1]
extent = [cal_hdd_min,cal_hdd_max,cal_cdd_min,cal_cdd_max]
plt.imshow(c,extent=extent,cmap='Blues',interpolation='none')
plt.xlabel('CDD Set Point Temperature')
plt.ylabel('HDD Set Point Temperature')
plt.show()
In [3]:
def index_df_by_date(df):
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)
df.index.snap() # snap to nearest frequency
In [4]:
def get_station_id_from_zip_code(zip_code):
[lat,lng]=get_lat_lng_from_zip_code(zip_code)
station_id=get_station_id_from_lat_lng(lat,lng)
return station_id
In [5]:
def get_station_id_from_lat_lng(lat,lng):
f = urllib2.urlopen('http://developer.nrel.gov/api/solar/data_query/v1.json?api_key=API_KEY&lat='+str(lat)+'&lon='+str(lng))
json_string = f.read()
parsed_json = json.loads(json_string)
station_id_unicode=parsed_json['outputs']['tmy3']['id']
station_id=int(str.split(str(station_id_unicode),'-')[1])
return station_id
In [6]:
def get_lat_lng_from_zip_code(zip_code):
zip_code=zip_code.replace(' ','+')
zip_code=zip_code.replace(',','%2C')
f = urllib2.urlopen('https://maps.googleapis.com/maps/api/geocode/json?address='+zip_code+'&key=API_KEY')
json_string = f.read()
parsed_json_lat_lng = json.loads(json_string)
lat=parsed_json_lat_lng['results'][0]['geometry']['location']['lat']
lng=parsed_json_lat_lng['results'][0]['geometry']['location']['lng']
return [lat,lng]
In [7]:
def get_hdd(ref_temp,df):
df['hdd']=ref_temp-df.temps
df['hdd'].loc[df.hdd<0]=0
df['hdd_cum']=df.hdd.cumsum()
return df
def get_cdd(ref_temp,df):
df['cdd']=df.temps-ref_temp
df['cdd'].loc[df.cdd<0]=0
df['cdd_cum']=df.cdd.cumsum()
return df
In [8]:
def degree_day_regression(df, x_opt='both'):
'''Function that runs the weather normalization regression on energy use data
--------------
df: dataframe that includes
use per day (upd)
heating degree days per day (hddpd)
cooling degree days per day (cddpd)
---------------
x_opt: options for the regression function
'hdd': run regression with just heating degree days
'cdd': run regression with just cooling degree days
'both' (default):
'''
#select model based on options
if x_opt == 'hdd':
covar = {'HDD': df.hdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[1], results.std_err[1], results.beta[0], results.std_err[0], results.r2, results.r2_adj, results.nobs ]],
columns = ['intercept', 'intercept_std_err', 'HDD', 'HDD_std_err',' R2', 'R2_adj','N_reads'])
elif x_opt == 'cdd':
covar = {'CDD': df.cdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[1], results.std_err[1], results.beta[0], results.std_err[0], results.r2, results.r2_adj, results.nobs]],
columns = ['intercept', 'intercept_std_err', 'CDD', 'CDD_std_err', 'R2', 'R2_adj','N_reads'])
elif x_opt == 'both':
covar = {'CDD': df.cdd_per_day, 'HDD': df.hdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[2], results.std_err[2], results.beta[0], results.std_err[0], results.beta[1], results.std_err[1], results.r2, results.r2_adj, results.nobs]],
columns = ['intercept', 'intercept_std_err', 'CDD', 'CDD_std_err', 'HDD','HDD_std_err', 'R2', 'R2_adj','N_reads'])
In [ ]: