In [39]:
#air=0,2
#fridge=0,.12
#install sqlalchemy,pymongo,scikit-learn,update pandas
import sys
sys.path.append('../../') # or non-Unix equivalent (add wikienergy/ to path)
import numpy as np
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
from disaggregator import PecanStreetDatasetAdapter as psda
from disaggregator import utils
from disaggregator import fhmm
from disaggregator import evaluation_metrics as metric
from disaggregator import appliance as app
from hmmlearn import hmm
from copy import deepcopy
import pymc
import pylab
import pandas as pd
In [15]:
#Load Datasets
devices_types_unsampled={}
ids_for_devices={}
db_url='postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres'
psda.set_url(db_url)
schema = 'shared'
tables= psda.get_table_names(schema)
print tables
In [16]:
table=tables[3]
print table
ids_device_name='air1'
ids_for_devices[ids_device_name]=psda.get_dataids_with_real_values(schema,table,ids_device_name)
print ids_for_devices[ids_device_name]
In [17]:
num_houses=10
device_name='air1'
devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][10:10+num_houses])
device_name='use'
devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][10:10+num_houses])
In [18]:
devices_types=resample_and_split(devices_types_unsampled,'30T','D',True,True)
In [19]:
#Make up the not air for the other pickling
new_traces=[]
new_insts=[]
for i,instance in enumerate(devices_types['air1'].instances):
for j,trace in enumerate(instance.traces):
new_series=devices_types['use'].instances[i].traces[j].series-trace.series
meta_trace=deepcopy(trace.metadata)
meta_trace['device_name']='not_air'
new_traces.append(app.ApplianceTrace(new_series,meta_trace))
meta_inst=deepcopy(instance.metadata)
meta_inst['device_name']='not_air'
new_insts.append(app.ApplianceInstance(new_traces,meta_inst))
meta_type=deepcopy(devices_types['air1'].metadata)
meta_type['device_name']='not_air'
not_air_type=app.ApplianceType(new_insts,meta_type)
devices_types['not_air']=not_air_type
In [20]:
#devices_types is the test data
In [40]:
errors_mean_cov=[]
state1_min_mean=1
state1_max_mean=2
state1_min_cov=0.0005
state1_max_cov=3
num_iter_mean=10
num_iter_cov=4
device_type_name='air1'
state1_means = np.linspace(state1_min_mean,state1_max_mean,num_iter_mean)
state1_covs = np.linspace(state1_min_cov,state1_max_cov,num_iter_cov)
error_dict=iterate_models(devices_types,device_type_name,state1_min_mean,state1_max_mean,state1_min_cov,state1_max_cov,num_iter_mean,num_iter_cov)
In [41]:
plot_error_iter_models(error_dict)
In [21]:
devices_types_unsampled_test={}
num_houses_test=15
device_name='air1'
devices_types_unsampled_test[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][num_houses:num_houses+num_houses_test])
device_name='use'
devices_types_unsampled_test[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][num_houses:num_houses+num_houses_test])
In [22]:
devices_types_test=resample_and_split(devices_types_unsampled_test,'30T','D',True,True)
In [23]:
#Make up the not air for the other pickling
new_traces=[]
new_insts=[]
for i,instance in enumerate(devices_types_test_low['air1'].instances):
for j,trace in enumerate(instance.traces):
new_series=devices_types_test_low['use'].instances[i].traces[j].series-trace.series
meta_trace=deepcopy(trace.metadata)
meta_trace['device_name']='not_air'
new_traces.append(app.ApplianceTrace(new_series,meta_trace))
meta_inst=deepcopy(instance.metadata)
meta_inst['device_name']='not_air'
new_insts.append(app.ApplianceInstance(new_traces,meta_inst))
meta_type=deepcopy(devices_types_test_low['air1'].metadata)
meta_type['device_name']='not_air'
not_air_type=app.ApplianceType(new_insts,meta_type)
devices_types_test_low['not_air']=not_air_type
In [24]:
#Develop an air and a not air model
best_models={}
#Not Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.005,0.995],[0.995,0.005]])
mean_prior=np.array([[0],[1.120]])
cov_prior=np.array([[[0.0001]],[[0.01]]])
best_models['not_air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models['not_air1'].means_ = mean_prior
best_models['not_air1'].covars_ = cov_prior
#Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.95,0.05],[0.05,0.95]])
mean_prior=np.array([[0],[1.8]])
cov_prior=np.array([[[0.001]],[[.2]]])
best_models['air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models['air1'].means_ = mean_prior
best_models['air1'].covars_ = cov_prior
In [375]:
#Develop an air and a not air model
best_models_low={}
#Not Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.005,0.995],[0.995,0.005]])
mean_prior=np.array([[0],[1.550]])
cov_prior=np.array([[[0.0001]],[[0.005]]])
best_models_low['not_air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models_low['not_air1'].means_ = mean_prior
best_models_low['not_air1'].covars_ = cov_prior
#Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.95,0.05],[0.05,0.95]])
mean_prior=np.array([[0],[1]])
cov_prior=np.array([[[0.001]],[[.01]]])
best_models_low['air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models_low['air1'].means_ = mean_prior
best_models_low['air1'].covars_ = cov_prior
In [343]:
#Develop an air and a not air model
best_models_high={}
#Not Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.005,0.995],[0.995,0.005]])
mean_prior=np.array([[0],[1.120]])
cov_prior=np.array([[[0.0001]],[[0.01]]])
best_models_high['not_air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models_high['not_air1'].means_ = mean_prior
best_models_high['not_air1'].covars_ = cov_prior
#Air
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.95,0.05],[0.05,0.95]])
mean_prior=np.array([[0],[3.33]])
cov_prior=np.array([[[0.001]],[[.2]]])
best_models_high['air1'] = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
best_models_high['air1'].means_ = mean_prior
best_models_high['air1'].covars_ = cov_prior
In [25]:
device_type_name='air1'
thresh=20
min_val=760
test_data={}
error_dict=evaluate_FHMM_single_type(devices_types_test,best_models,devic
In [79]:
house_num=4
house_id=devices_types_test['air1'].instances[house_num].metadata['dataid']
view_performance_one_house(devices_types_test,house_num,best_models)
In [88]:
#Slope-intercept
with open('../weather/temp_austion_jan_may.pkl','rb') as f:
pecan_temps=pickle.load(f)
In [37]:
pecan_temps_resampled=pecan_temps.resample('D', fill_method='ffill')
pecan_temps_resampled['temps']=pecan_temps_resampled['temp']
In [50]:
house_energy_daily={}
for i,instance in enumerate(devices_types['use'].instances):
daily_energy_total={}
daily_energy_air={}
for j,trace in enumerate(instance.traces):
daily_energy_total[trace.series.index[0]]=trace.series[0]*24
air1_trace=devices_types['air1'].instances[i].traces[j]
daily_energy_air[air1_trace.series.index[0]]=air1_trace.series[0]*24
df=pd.DataFrame.from_dict(daily_energy_total,orient='index')
df_air=pd.DataFrame.from_dict(daily_energy_air,orient='index')
df=df.sort_index()
df_air=df_air.sort_index()
df['kwh']=df[0]
df['air_kwh']=df_air[0]
df.drop(0, axis=1, inplace=True)
house_energy_daily[instance.metadata['dataid']]=df
In [51]:
house_energy_daily.keys()
Out[51]:
In [68]:
plt.plot(house_energy_daily[house_id])
Out[68]:
In [69]:
date_start='2014-01-01'
date_end='2014-05-30'
cal_temps=pecan_temps_resampled[date_start:date_end]
cal_elec=house_energy_daily[house_id][date_start:date_end]
cal_hdd_temp_range=range(55,65)
cal_cdd_temp_range=range(55,65)
model_type=['cdd']
In [70]:
#BOTH
a=[]
c=[]
for type_of_dd in model_type:
best_r2_adj=0
for cdd_temp in cal_cdd_temp_range:
cal_temps=get_cdd(cdd_temp,cal_temps)
for hdd_temp in cal_hdd_temp_range:
cal_temps=get_hdd(hdd_temp,cal_temps)
cal_elec['use_per_day']=cal_elec['kwh']
cal_all=pd.merge(cal_elec,cal_temps,left_index=True,right_index=True)
if(type_of_dd is 'hdd' or 'both'):
cal_all['hdd_per_day']=cal_all['hdd']
if(type_of_dd is 'cdd' or 'both'):
cal_all['cdd_per_day']=cal_all['cdd']
#cal_all_pre=cal_all.sort_index()[:pre_date]
#cal_all_post=cal_all.sort_index()[post_date:]
results=degree_day_regression(cal_all,type_of_dd)
r2_adj=np.array(results['R2_adj'])[0]
if(r2_adj>best_r2_adj):
best_cdd_temp=cdd_temp
best_hdd_temp=hdd_temp
best_r2_adj=r2_adj
slope_cdd=np.array(results['CDD'])[0]
intercept=np.array(results['intercept'])[0]
cal_all_best=cal_all
a.append(r2_adj)
c.append(a)
a=[]
#print str(cdd_temp)+"," +str(hdd_temp)+" "+ str(r2_adj)
print "Best CDD set point:" + str(best_cdd_temp)
print "BEST r2_adj:" + str(best_r2_adj)
print "Best CDD slope:"+str(slope_cdd)
print "Best intercept:"+str(intercept)
In [84]:
#May Data
slope_cdd=1.02399
intercept=16.76
cal_all_test=cal_all_best
error=[]
error_perc=[]
pred_air=[]
daily_proportion_act=[]
daily_proportion_pred=[]
daily_proportion_error=[]
total_air_pred=[]
time=[]
for i,val in enumerate(cal_all_test['cdd']):
real_air_kwh_per_day=float(cal_all_test['air_kwh'][i])
use_kwh_per_day=float(cal_all_test['kwh'][i])
pred_air_kwh_per_day=val*slope_cdd*24
if(pred_air_kwh_per_day<0):
pred_air_kwh_per_day=0
pred_air.append(pred_air_kwh_per_day)
error.append((real_air_kwh_per_day-pred_air_kwh_per_day))
error_perc.append(100*(real_air_kwh_per_day-pred_air_kwh_per_day)/real_air_kwh_per_day)
daily_proportion_error.append(100*((real_air_kwh_per_day/use_kwh_per_day)-(pred_air_kwh_per_day/use_kwh_per_day))/(real_air_kwh_per_day/use_kwh_per_day))
daily_proportion_act.append(100*real_air_kwh_per_day/use_kwh_per_day)
daily_proportion_pred.append(100*pred_air_kwh_per_day/use_kwh_per_day)
time.append(cal_all_test['kwh'].index[i])
In [87]:
house_num=4
house_id=devices_types_test['air1'].instances[house_num].metadata['dataid']
view_performance_one_house(devices_types_test,house_num,best_models,pred_air)
In [ ]:
#Complicated Method for getting good and bad ids
In [118]:
good_ids_tuple={}
for val in model_eval_dict['good_house_ids']:
good_ids_tuple[val[1]]=[]
for val in model_eval_dict['good_house_ids']:
good_ids_tuple[val[1]].append(val[2])
bad_ids_tuple={}
for val in model_eval_dict['bad_house_ids']:
bad_ids_tuple[val[1]]=[]
for val in model_eval_dict['bad_house_ids']:
bad_ids_tuple[val[1]].append(val[2])
In [321]:
devices_types_test_good={}
devices_types_test_bad={}
new_good_traces=[]
new_good_instances=[]
new_good_traces_use=[]
new_good_instances_use=[]
for house_num in good_ids_tuple:
new_good_traces_use=[]
new_good_traces=[]
for trace_num in good_ids_tuple[house_num]:
new_series=devices_types_test['air1'].instances[house_num].traces[trace_num].series
new_meta_trace=devices_types_test['air1'].instances[house_num].traces[trace_num].metadata
new_good_traces.append(app.ApplianceTrace(new_series,new_meta_trace))
new_series_use=devices_types_test['use'].instances[house_num].traces[trace_num].series
new_meta_trace_use=devices_types_test['use'].instances[house_num].traces[trace_num].metadata
new_good_traces_use.append(app.ApplianceTrace(new_series_use,new_meta_trace_use))
new_meta_inst=devices_types_test['air1'].instances[house_num].metadata
new_good_instances.append(app.ApplianceInstance(new_good_traces,new_meta_inst))
new_meta_inst_use=devices_types_test['use'].instances[house_num].metadata
new_good_instances_use.append(app.ApplianceInstance(new_good_traces_use,new_meta_inst_use))
meta_type=devices_types_test['air1'].metadata
devices_types_test_good['air1']=app.ApplianceType(new_good_instances,meta_type)
meta_type_use=devices_types_test['use'].metadata
devices_types_test_good['use']=app.ApplianceType(new_good_instances_use,meta_type_use)
new_bad_traces=[]
new_bad_instances=[]
new_bad_traces_use=[]
new_bad_instances_use=[]
for house_num in bad_ids_tuple:
new_bad_traces_use=[]
new_bad_traces=[]
for trace_num in bad_ids_tuple[house_num]:
new_series=devices_types_test['air1'].instances[house_num].traces[trace_num].series
new_meta_trace=devices_types_test['air1'].instances[house_num].traces[trace_num].metadata
new_bad_traces.append(app.ApplianceTrace(new_series,new_meta_trace))
new_series_use=devices_types_test['use'].instances[house_num].traces[trace_num].series
new_meta_trace_use=devices_types_test['use'].instances[house_num].traces[trace_num].metadata
new_bad_traces_use.append(app.ApplianceTrace(new_series_use,new_meta_trace_use))
new_meta_inst=devices_types_test['air1'].instances[house_num].metadata
new_bad_instances.append(app.ApplianceInstance(new_bad_traces,new_meta_inst))
new_meta_inst_use=devices_types_test['use'].instances[house_num].metadata
new_bad_instances_use.append(app.ApplianceInstance(new_bad_traces_use,new_meta_inst_use))
meta_type=devices_types_test['air1'].metadata
devices_types_test_bad['air1']=app.ApplianceType(new_bad_instances,meta_type)
meta_type_use=devices_types_test['use'].metadata
devices_types_test_bad['use']=app.ApplianceType(new_bad_instances_use,meta_type_use)
In [322]:
device_type_name='air1'
test_data={}
error_dict=evaluate_FHMM_single_type(devices_types_test_bad,best_models,device_type_name,thresh,plot_sum=True,plot_ind=False)
In [323]:
#GETTING PARAMETERS FOR PREVIOUSLY 'BAD' HOUSES
errors_mean_cov=[]
state1_min_mean=0.1
state1_max_mean=3
state1_min_cov=0.0005
state1_max_cov=.001
num_iter_mean=10
num_iter_cov=2
device_type_name='air1'
state1_means = np.linspace(state1_min_mean,state1_max_mean,num_iter_mean)
state1_covs = np.linspace(state1_min_cov,state1_max_cov,num_iter_cov)
error_dict=iterate_models(devices_types_test_bad,device_type_name,state1_min_mean,state1_max_mean,state1_min_cov,state1_max_cov,num_iter_mean,num_iter_cov)
In [324]:
plot_error_iter_models(error_dict)
In [207]:
errors_mean_cov=[]
state1_min_mean=0.5
state1_max_mean=2.5
state1_min_cov=0.0005
state1_max_cov=.001
num_iter_mean=10
num_iter_cov=2
device_type_name='air1'
state1_means = np.linspace(state1_min_mean,state1_max_mean,num_iter_mean)
state1_covs = np.linspace(state1_min_cov,state1_max_cov,num_iter_cov)
error_dict=iterate_models(devices_types_test_good,device_type_name,state1_min_mean,state1_max_mean,state1_min_cov,state1_max_cov,num_iter_mean,num_iter_cov)
In [ ]:
#Getting Low and High houses seperated
In [312]:
devices_types_test_high={}
devices_types_test_low={}
min_val=760
new_good_traces=[]
new_good_instances=[]
new_good_traces_use=[]
new_good_instances_use=[]
new_bad_traces=[]
new_bad_instances=[]
new_bad_traces_use=[]
new_bad_instances_use=[]
for house_num,instance in enumerate(devices_types_test['air1'].instances):
new_good_traces_use=[]
new_good_traces=[]
new_bad_traces_use=[]
new_bad_traces=[]
for trace_num,trace in enumerate(instance.traces):
power_total=utils.trace_series_to_numpy_array(devices_types_test['use'].instances[house_num].traces[trace_num].series)
power_total_sum=np.sum(power_total/len(power_total)*24*30)
if(power_total_sum>min_val):
new_series=devices_types_test['air1'].instances[house_num].traces[trace_num].series
new_meta_trace=devices_types_test['air1'].instances[house_num].traces[trace_num].metadata
new_good_traces.append(app.ApplianceTrace(new_series,new_meta_trace))
new_series_use=devices_types_test['use'].instances[house_num].traces[trace_num].series
new_meta_trace_use=devices_types_test['use'].instances[house_num].traces[trace_num].metadata
new_good_traces_use.append(app.ApplianceTrace(new_series_use,new_meta_trace_use))
else:
new_series=devices_types_test['air1'].instances[house_num].traces[trace_num].series
new_meta_trace=devices_types_test['air1'].instances[house_num].traces[trace_num].metadata
new_bad_traces.append(app.ApplianceTrace(new_series,new_meta_trace))
new_series_use=devices_types_test['use'].instances[house_num].traces[trace_num].series
new_meta_trace_use=devices_types_test['use'].instances[house_num].traces[trace_num].metadata
new_bad_traces_use.append(app.ApplianceTrace(new_series_use,new_meta_trace_use))
new_meta_inst=devices_types_test['air1'].instances[house_num].metadata
new_good_instances.append(app.ApplianceInstance(new_good_traces,new_meta_inst))
new_meta_inst_use=devices_types_test['use'].instances[house_num].metadata
new_good_instances_use.append(app.ApplianceInstance(new_good_traces_use,new_meta_inst_use))
new_meta_inst=devices_types_test['air1'].instances[house_num].metadata
new_bad_instances.append(app.ApplianceInstance(new_bad_traces,new_meta_inst))
new_meta_inst_use=devices_types_test['use'].instances[house_num].metadata
new_bad_instances_use.append(app.ApplianceInstance(new_bad_traces_use,new_meta_inst_use))
meta_type=devices_types_test['air1'].metadata
devices_types_test_high['air1']=app.ApplianceType(new_good_instances,meta_type)
meta_type_use=devices_types_test['use'].metadata
devices_types_test_high['use']=app.ApplianceType(new_good_instances_use,meta_type_use)
meta_type=devices_types_test['air1'].metadata
devices_types_test_low['air1']=app.ApplianceType(new_bad_instances,meta_type)
meta_type_use=devices_types_test['use'].metadata
devices_types_test_low['use']=app.ApplianceType(new_bad_instances_use,meta_type_use)
In [347]:
device_type_name='air1'
thresh=10
test_data={}
error_dict=evaluate_FHMM_single_type(devices_types_test_high,best_models,device_type_name,thresh,plot_sum=True,plot_ind=False)
In [377]:
device_type_name='air1'
test_data={}
thresh=10
error_dict=evaluate_FHMM_single_type(devices_types_test_low,best_models,device_type_name,thresh,plot_sum=True,plot_ind=False)
In [382]:
errors_mean_cov=[]
state1_min_mean=.2
state1_max_mean=2
state1_min_cov=0.0005
state1_max_cov=0.01
num_iter_mean=5
num_iter_cov=2
device_type_name='not_air'
state1_means = np.linspace(state1_min_mean,state1_max_mean,num_iter_mean)
state1_covs = np.linspace(state1_min_cov,state1_max_cov,num_iter_cov)
error_dict=iterate_models(devices_types_test_low,device_type_name,state1_min_mean,state1_max_mean,state1_min_cov,state1_max_cov,num_iter_mean,num_iter_cov)
In [383]:
plot_error_iter_models(error_dict)
In [3]:
#Functions
In [7]:
#Resamples the data
def resample_and_split(devices_types_unsampled_test,sample_rate,length,sample,split):
devices_types_test={}
devices_types_unsplit_test={}
for key in devices_types_unsampled_test:
if(sample):
devices_types_unsplit_test[key]=devices_types_unsampled_test[key].resample(sample_rate)
else:
devices_types_unsplit_test[key]=devices_types_unsampled_test[key]
if(split):
devices_types_test[key]=devices_types_unsplit_test[key].split_by(length)
else:
devices_types_test[key]=devices_types_unsplit_test[key]
print "Resampled " + str(key)
return devices_types_test
In [8]:
def iterate_models(devices_types_i,device_type_name,state1_min_mean,state1_max_mean,state1_min_cov,state1_max_cov,num_iter_mean,num_iter_cov):
best_error=100000000
error_dict={}
for state1_mean in state1_means:
errors_cov=[]
for state1_cov in state1_covs:
pi_prior=np.array([0.9,0.1])
a_prior=np.array([[0.95,0.05],[0.05,0.95]])
mean_prior=np.array([[0],[state1_mean]])
cov_prior=np.array([[[0.0001]],[[state1_cov]]])
model = hmm.GaussianHMM(pi_prior.size,'full',pi_prior,a_prior)
model.means_ = mean_prior
model.covars_ = cov_prior
errors_house=[]
for instance in devices_types_i[device_type_name].instances:
for trace in instance.traces:
error=get_model_error_from_trace_n(trace,model)
errors_cov.append(error)
errors_house.append(error)
print str(state1_mean)+", "+str(state1_cov)+", " + str(np.mean(errors_house))
if(np.mean(errors_house)<best_error):
best_mean=state1_mean
best_cov=state1_cov
best_error=np.mean(errors_house)
errors_mean_cov.append(errors_cov)
error_dict['error_vals']=errors_mean_cov
error_dict['cov_vals']=state1_covs
error_dict['mean_vals']=state1_means
error_dict['best_mean']=best_mean
error_dict['best_cov']=best_cov
error_dict['best_error']=best_error
with open('error_m_'+str(state1_min_mean)+'_'+
str(state1_max_mean)+'_c_'+str(state1_min_cov)+'_'+str(state1_max_cov)+'.pkl','w') as f:
pickle.dump(error_dict,f)
return error_dict
def plot_error_iter_models(error_dict_new):
plt.clf()
extent = [error_dict_new['cov_vals'].min(),error_dict_new['cov_vals'].max(),
error_dict_new['mean_vals'].min(),error_dict_new['mean_vals'].max()]
plt.imshow(error_dict_new['error_vals'],extent=extent,cmap='Blues_r',interpolation='none',aspect='auto')
pylab.rcParams['figure.figsize'] = 12,9
plt.xlabel('Covariance Values')
plt.ylabel('Mean Values')
plt.show()
print error_dict_new['best_mean']
print error_dict_new['best_cov']
In [9]:
def get_model_error_from_trace_n(test_trace,model,plot=False):
error=[]
obs_power=utils.trace_series_to_numpy_array(test_trace.series)
est_states=model.predict(obs_power)
est_power=[]
for val in est_states:
est_power.append(float(np.random.normal(model._means_[val],model._covars_[val],1)[0]))
if(plot):
plt.figure()
plt.plot(obs_power,'k')
plt.plot(est_power,'b',alpha=.5)
error.append(metric.sum_error(obs_power,np.array(est_power)))
return np.sum(error)/float(np.sum(obs_power))
In [10]:
def evaluate_FHMM_single_type(types_eval,best_models,device_type_name,thresh,min_val=0,mass=True,plot_sum=True,plot_ind=False):
model_eval_dict={}
test_data={}
model_eval_dict['precision']=[]
model_eval_dict['recall']=[]
model_eval_dict['test_energy']=[]
model_eval_dict['pred_energy']=[]
model_eval_dict['error_perc']=[]
model_eval_dict['power_total_sums']=[]
model_eval_dict['diff_power_perc']=[]
model_eval_dict['good_house_ids']=[]
model_eval_dict['bad_house_ids']=[]
model_eval_dict['power_avg_good']=0
model_eval_dict['power_avg_bad']=0
model_eval_dict['num_less_than']=0
model_eval_dict['num_less_than_more_min']=0
model_eval_dict['num_more_min']=0
model_eval_dict['num_less_than_less_min']=0
model_eval_dict['num_less_min']=0
for house_num,instance_test in enumerate(types_eval[device_type_name].instances):
model_eval_dict=eval_model_with_instance(types_eval,device_type_name,best_models,house_num,model_eval_dict,thresh,min_val,mass,plot_ind)
for house_num,instance_test in enumerate(types_eval[device_type_name].instances):
for trace_num,trace in enumerate(types_eval[device_type_name].instances[house_num].traces):
test_data[device_type_name]=utils.trace_series_to_numpy_array(types_eval[device_type_name].instances[house_num].traces[trace_num].series)
model_eval_dict['baseline_perc']=(model_eval_dict['test_energy']-(np.sum(model_eval_dict['test_energy'])/len(model_eval_dict['test_energy'])))
model_eval_dict['baseline_less_than']=sum(abs(i) < thresh for i in model_eval_dict['baseline_perc'])
model_eval_dict['baseline_less_than_perc']=model_eval_dict['baseline_less_than']/float(len(model_eval_dict['baseline_perc']))*100
model_eval_dict['num_less_than_perc']=model_eval_dict['num_less_than']/float(len(model_eval_dict['error_perc']))*100
if(model_eval_dict['num_more_min']>0):
model_eval_dict['num_less_than_more_min_perc']=model_eval_dict['num_less_than_more_min']/float(model_eval_dict['num_more_min'])*100
else:
model_eval_dict['num_less_than_more_min_perc']=0
if(model_eval_dict['num_less_min']>0):
model_eval_dict['num_less_than_less_min_perc']=model_eval_dict['num_less_than_less_min']/float(model_eval_dict['num_less_min'])*100
else:
model_eval_dict['num_less_than_less_min_perc']=0
if(plot_sum):
plot_and_sum_model_eval(model_eval_dict)
return model_eval_dict
def eval_model_with_instance(types_eval,device_type_name,best_models,house_num,model_eval_dict,thresh,min_val=0,mass=True,plot=False):
model_fhmm,means_fhmm,var_fhmm=fhmm.generate_FHMM_from_HMMs(best_models)
appliance_list=['air1','not_air']
for trace_num,trace in enumerate(types_eval[device_type_name].instances[house_num].traces):
test_data[device_type_name]=utils.trace_series_to_numpy_array(types_eval[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(types_eval['use'].instances[house_num].traces[trace_num].series)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,var_fhmm,power_total)
if(plot):
plt.figure()
plt.plot(power_total)
plt.plot(test_data['air1'])
plt.plot(decoded_power['air1'],'r')
truth_states=metric.guess_truth_from_power(test_data[device_type_name],2)
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[device_type_name])
diff_power_perc=(metric.sum_error(test_data[device_type_name],decoded_power[device_type_name])*100/np.sum(test_data[device_type_name]))
precision_val=(metric.get_precision(eval_metrics['tp'],eval_metrics['fp']))
recall_val=(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn']))
model_eval_dict['precision'].append(precision_val)
model_eval_dict['recall'].append(recall_val)
test_energy=(np.sum(test_data[device_type_name])/len(test_data[device_type_name]))*24*30
pred_energy=(np.sum(decoded_power[device_type_name])/len(decoded_power[device_type_name]))*24*30
error_perc=float(test_energy-pred_energy)/test_energy*100
power_total_sum=np.sum(power_total/len(power_total)*24*30)
model_eval_dict['diff_power_perc'].append(diff_power_perc)
model_eval_dict['test_energy'].append(test_energy)
model_eval_dict['pred_energy'].append(pred_energy)
model_eval_dict['error_perc'].append(error_perc)
model_eval_dict['power_total_sums'].append(power_total_sum)
if(power_total_sum>min_val):
model_eval_dict['num_more_min']=model_eval_dict['num_more_min']+1
else:
model_eval_dict['num_less_min']=model_eval_dict['num_less_min']+1
if(abs(error_perc)<thresh):
model_eval_dict['num_less_than']=model_eval_dict['num_less_than']+1
model_eval_dict['power_avg_good']=model_eval_dict['power_avg_good']+power_total_sum
model_eval_dict['good_house_ids'].append([trace.metadata['dataid'],house_num,trace_num])
if(power_total_sum>min_val):
model_eval_dict['num_less_than_more_min']= model_eval_dict['num_less_than_more_min']+1
else:
model_eval_dict['num_less_than_less_min']= model_eval_dict['num_less_than_less_min']+1
else:
model_eval_dict['power_avg_bad']=model_eval_dict['power_avg_bad']+power_total_sum
model_eval_dict['bad_house_ids'].append([trace.metadata['dataid'],house_num,trace_num])
return model_eval_dict
def plot_and_sum_model_eval(model_eval_dict):
if(model_eval_dict['num_less_than']>0):
print 'Average Power of Houses w/ <'+str(thresh)+'% Error: ' + str(model_eval_dict['power_avg_good']/float(model_eval_dict['num_less_than']))
if((len(model_eval_dict['error_perc'])-model_eval_dict['num_less_than'])>0):
print 'Average Power of Houses w/ >='+str(thresh)+'% Error: '+str(model_eval_dict['power_avg_bad']/float(len(model_eval_dict['error_perc'])-model_eval_dict['num_less_than']))
print
print 'Percentage of Houses with <'+str(thresh)+'% Error (Model): ' + str(model_eval_dict['num_less_than_perc'])
print 'Percentage of Houses with <'+str(thresh)+'% Error (Baseline): ' + str(model_eval_dict['baseline_less_than_perc'])
print
print 'Percentage of Houses with <'+str(thresh)+'% Error and more than min_val (Model): ' + str(model_eval_dict['num_less_than_more_min_perc'])
print 'Percentage of Houses with <'+str(thresh)+'% Error and less than min_val (Model): ' + str(model_eval_dict['num_less_than_less_min_perc'])
baseline_val=np.sum(model_eval_dict['test_energy'])/len(model_eval_dict['test_energy'])
a=np.empty(len(model_eval_dict['test_energy']))
a[:]=(baseline_val)
base_diff_power_perc=(metric.sum_error(test_data[device_type_name],a)*100/np.sum(test_data[device_type_name]))
plt.figure()
plt.plot(model_eval_dict['test_energy'],'b')
plt.plot(model_eval_dict['pred_energy'],'r')
plt.plot(a,'k')
plt.title('Predicted Energy (Red), Actual Energy (Blue)')
In [86]:
def view_performance_one_house(devices_types_test_p,house_num,best_models,values_from_regression=[]):
test_energy=[]
pred_energy=[]
model_fhmm,means_fhmm,vars_fhmm=fhmm.generate_FHMM_from_HMMs(best_models)
appliance_list=['air1','not_air']
total_num=len(devices_types_test_p[device_type_name].instances[house_num].traces)
for trace_num,trace in enumerate(devices_types_test_p[device_type_name].instances[house_num].traces):
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types_test_p[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(devices_types_test_p['use'].instances[house_num].traces[trace_num].series)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,vars_fhmm,power_total)
plt.subplot(6,5,trace_num)
plt.plot(test_data['air1'])
plt.plot(power_total)
plt.plot(decoded_power['air1'],'r')
plt.ylim([0,5])
test_energy.append((np.sum(test_data[device_type_name])/len(test_data[device_type_name]))*24*30)
pred_energy.append((np.sum(decoded_power[device_type_name])/len(decoded_power[device_type_name]))*24*30)
plt.figure()
plt.plot(test_energy,'k')
plt.plot(pred_energy,'r')
if(len(values_from_regression)>0):
plt.plot(values_from_regression,'g')
plt.xlabel('Day of Month')
plt.ylabel('Total Daily Energy Use (Red=Pred,Black=Real,Green=Lin')
print "actual is "+str(np.sum(test_energy))+", predicted is " + str(np.sum(pred_energy))
print "error is " + str((np.sum(pred_energy)-np.sum(test_energy))/np.sum(test_energy)*100)
In [44]:
def get_hdd(ref_temp,df):
df['hdd']=ref_temp-df.temps
df['hdd'].loc[df.hdd<0]=0
df['hdd_cum']=df.hdd.cumsum()
return df
def get_cdd(ref_temp,df):
df['cdd']=df.temps-ref_temp
df['cdd'].loc[df.cdd<0]=0
df['cdd_cum']=df.cdd.cumsum()
return df
def degree_day_regression(df, x_opt='both'):
'''Function that runs the weather normalization regression on energy use data
--------------
df: dataframe that includes
use per day (upd)
heating degree days per day (hddpd)
cooling degree days per day (cddpd)
---------------
x_opt: options for the regression function
'hdd': run regression with just heating degree days
'cdd': run regression with just cooling degree days
'both' (default):
'''
#select model based on options
if x_opt == 'hdd':
covar = {'HDD': df.hdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[1], results.std_err[1], results.beta[0], results.std_err[0], results.r2, results.r2_adj, results.nobs ]],
columns = ['intercept', 'intercept_std_err', 'HDD', 'HDD_std_err',' R2', 'R2_adj','N_reads'])
elif x_opt == 'cdd':
covar = {'CDD': df.cdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[1], results.std_err[1], results.beta[0], results.std_err[0], results.r2, results.r2_adj, results.nobs]],
columns = ['intercept', 'intercept_std_err', 'CDD', 'CDD_std_err', 'R2', 'R2_adj','N_reads'])
elif x_opt == 'both':
covar = {'CDD': df.cdd_per_day, 'HDD': df.hdd_per_day}
results = pd.ols(y=df.use_per_day, x = covar)
return pd.DataFrame([[results.beta[2], results.std_err[2], results.beta[0], results.std_err[0], results.beta[1], results.std_err[1], results.r2, results.r2_adj, results.nobs]],
columns = ['intercept', 'intercept_std_err', 'CDD', 'CDD_std_err', 'HDD','HDD_std_err', 'R2', 'R2_adj','N_reads'])
In [8]:
In [ ]: