In [1]:
#air=0,2
#fridge=0,.12
#install sqlalchemy,pymongo,scikit-learn,update pandas
import sys
sys.path.append('../../') # or non-Unix equivalent (add wikienergy/ to path)
import numpy as np
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
from disaggregator import PecanStreetDatasetAdapter as psda
from disaggregator import utils
from disaggregator import fhmm
from disaggregator import evaluation_metrics as metric
import random
import pandas as pd
reload(metric)
reload(fhmm)
from copy import deepcopy
#pi_prior=np.array([0.5,0.5])
#a_prior=np.array([[0.98,0.02],[0.02,0.98]])
#mean_prior=np.array([[0],[2]])
#cov_prior=np.tile(1, (2, 1, 1))
#gen.generate_and_pickle_models('air1',pi_prior,a_prior,mean_prior,cov_prior,'dataid',2,'D','1T')
In [2]:
#Load Datasets
devices_types={}
devices_types_unsampled={}
ids_for_devices={}
db_url='postgresql://USERNAME:PASSWORD@db.wiki-energy.org:5432/postgres'
psda.set_url(db_url)
schema = 'curated'
tables= psda.get_table_names(schema)
print tables
In [3]:
table=tables[1]
ids_device_name='air1'
ids_for_devices[ids_device_name]=psda.get_dataids_with_real_values(schema,table,ids_device_name)
print ids_for_devices[ids_device_name]
In [4]:
ids_for_devices[ids_device_name]=[93,739,1953,2818,2864,3367,3723,5814,5972,6101,6636,7531,7536,7800,9609,9922,9926,9933]
#Try 1: [93,739,1953,2818,2864,3367,3723,5814,5972,6101], Top Models: [2787, 2365, 6836, 7769, 8079, 4922, 2575, 7531, 6910,7617]
#Try 2: [6636,7531,7536,7800,9609,9922,9926,9933], Top Models:[8079,6836,2365,2787,4922,2575,6910,9930,5109,7617]
In [5]:
num_houses=30
device_name='air1'
devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][:num_houses])
device_name='use'
devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][:num_houses])
In [56]:
device_name='refrigerator1'
devices_types_unsampled[device_name]=psda.generate_type_for_appliance_by_dataids(schema,table,device_name,ids_for_devices[ids_device_name][:num_houses])
In [179]:
devices_models={}
month=5
rate='15T'
with open('../../data/shared/air1/'+str(month)+'/air1_shared_'+str(month)+'_'+str(rate)+'.pkl','rb') as f:
devices_models['air1']=pickle.load(f)
#means_list=([devices_models['air1'][x].means_[1][0] for x in devices_models['air1']])
i=0
for l,key in enumerate(devices_models['air1']):
if devices_models['air1'][key]._means_[1]<.1:
devices_models['air1'].pop(key,None)
i=i+1
print "Deleted " + str(i) + " of "+str(l+1) +" models due to low on-states."
In [81]:
month=5
schema_train='shared'
hmm_models=get_models(schema_train,month,sample_rate)
In [8]:
state_0=[]
state_1=[]
remove_zeroes_threshold=0.05
for model_num in devices_models['air1']:
a=devices_models['air1'][model_num].means_
if(a[1][0]>remove_zeroes_threshold):
state_0.append(a[0][0])
state_1.append(a[1][0])
high_lim=10
bins=80
plt.hist(state_1,bins,range=[0,high_lim])
plt.xlim([0 ,high_lim])
state_1_nump=np.array(state_1)
mean=np.mean(state_1_nump,axis=0)
std=np.std(state_1_nump,axis=0)
print mean
print std
print mean - 2*std
print
state0other=([devices_models['air1'][x].means_[1][0] for x in devices_models['air1']])
mean=np.mean(state0other)
std=np.std(state0other)
print mean
print std
print mean - std
In [195]:
#Resamples the data
sample_rate='15T'
length='D'
devices_types_unsplit={}
for key in devices_types_unsampled:
devices_types_unsplit[key]=devices_types_unsampled[key].resample(sample_rate)
#devices_types[key]=devices_types_unsplit[key].split_by(length)
devices_types[key]=devices_types_unsplit[key]
print key
In [10]:
non_zero=0
total=0
for instance in devices_types['air1'].instances:
for trace in instance.traces:
non_zero=non_zero+ np.count_nonzero(trace.series)
total=total+trace.series.count()
print non_zero/float(total)*100
In [11]:
#Create single FHMM
type_models={}
house_id=26
type_models[ids_device_name]=devices_models[ids_device_name][house_id]
model_fhmm,means_fhmm=fhmm.generate_FHMM_from_HMMs(type_models)
In [12]:
print devices_models[ids_device_name].keys()
In [13]:
#Removes Houses that are in test data from training data
for id_val in ids_for_devices[ids_device_name]:
devices_models[ids_device_name].pop(id_val,None)
In [14]:
#Generate Test Data
test_data={}
house_num=random.randint(0,9)
trace_num=0
#print 'house num: ' + str(house_num)
#print 'trace num: ' + str(trace_num)
for device_type_name in devices_models:
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(devices_types['use'].instances[house_num].traces[trace_num].series)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
In [15]:
#Predict and Plot FHMM Results
plt.plot(power_total_minus_bottom,label='total')
plt.title('Aggregated Energy without constant power')
plt.ylabel('Energy (Wh)')
plt.xlabel('Time')
for i,device_type in enumerate(type_models):
plt.figure(1)
plt.plot(test_data[device_type],label=device_type)
plt.legend(bbox_to_anchor=(0., 1.05, 1, .102), loc=3,
ncol=2, mode="expand", borderaxespad=1.)
plt.figure()
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,test_data,power_total_minus_bottom,plot=True)
In [82]:
#Generate Test Data
test_data={}
#house_num=random.randint(0,9)
bests={}
averages={}
worsts={}
trace_num=0
for house_num,device_instance in enumerate(devices_types[device_type_name].instances):
for device_type_name in devices_models:
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(devices_types['use'].instances[house_num].traces[trace_num].series)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
dataid=devices_types[device_type_name].instances[house_num].metadata['dataid']
get_scores(test_data,power_total_minus_bottom,dataid,deepcopy(devices_models))
#Below removes houses with bad data
for house_id in deepcopy(bests):
if(np.count_nonzero(bests[house_id]['f1_score']['precision'])==0 or len(bests[house_id]['f1_score'])<10):
bests.pop(house_id,None)
averages.pop(house_id,None)
worsts.pop(house_id,None)
get_summary_scores(bests,averages,worsts)
In [178]:
with open('air1_shared_5_15T_best.pkl','w') as f:
pickle.dump(devices_models['air1'][best_model_for_set(bests)],f)
In [106]:
best_model_for_set(bests)
Out[106]:
In [184]:
device_type_name='air1'
[bests,averages,worsts]=score_models(device_type_name,devices_types,devices_models)
In [187]:
best_models={}
best_models['air1']=devices_models['air1'][best_model_for_set(bests)]
In [217]:
for model in devices_models['air1']:
print model._covars_
In [212]:
best_models['air1']._covars_=[1,1]
In [214]:
best_models['air1'].__dict__
Out[214]:
In [213]:
thresh=10
model_eval_dict=evaluate_FHMM_single_type(devices_types,best_models,device_type_name,thresh,plot_sum=True,plot_ind=True)
In [39]:
In [206]:
def eval_model_with_instance(types_eval,device_type_name,best_models,house_num,model_eval_dict,thresh,plot=False):
for trace_num,trace in enumerate(types_eval[device_type_name].instances[house_num].traces):
test_data[device_type_name]=utils.trace_series_to_numpy_array(types_eval[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(types_eval['use'].instances[house_num].traces[trace_num].series)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
model_fhmm,means_fhmm=fhmm.generate_FHMM_from_HMMs(best_models)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,test_data,power_total_minus_bottom)
if(plot):
plt.figure()
#plt.plot(power_total)
plt.plot(test_data['air1'])
plt.plot(decoded_power['air1'],'r')
truth_states=metric.guess_truth_from_power(test_data[device_type_name],2)
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[device_type_name])
diff_power_perc=(metric.sum_error(test_data[device_type_name],decoded_power[device_type_name])*100/np.sum(test_data[device_type_name]))
precision_val=(metric.get_precision(eval_metrics['tp'],eval_metrics['fp']))
recall_val=(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn']))
model_eval_dict['precision'].append(precision_val)
model_eval_dict['recall'].append(recall_val)
test_energy=(np.sum(test_data[device_type_name])/len(test_data[device_type_name]))*24*30
pred_energy=(np.sum(decoded_power[device_type_name])/len(decoded_power[device_type_name]))*24*30
error_perc=float(test_energy-pred_energy)/test_energy*100
power_total_sum=np.sum(power_total/len(power_total)*24*30)
model_eval_dict['diff_power_perc'].append(diff_power_perc)
model_eval_dict['test_energy'].append(test_energy)
model_eval_dict['pred_energy'].append(pred_energy)
model_eval_dict['error_perc'].append(error_perc)
model_eval_dict['power_total_sums'].append(power_total_sum)
if(abs(error_perc)<thresh):
model_eval_dict['num_less_than']=model_eval_dict['num_less_than']+1
model_eval_dict['power_avg_good']=model_eval_dict['power_avg_good']+power_total_sum
else:
model_eval_dict['power_avg_bad']=model_eval_dict['power_avg_bad']+power_total_sum
return model_eval_dict
In [51]:
def plot_and_sum_model_eval(model_eval_dict):
#Baseline: Get average energy usage of a house and use that.
if(model_eval_dict['num_less_than']>0):
print 'Average Power of Houses w/ <'+str(thresh)+'% Error: ' + str(model_eval_dict['power_avg_good']/float(model_eval_dict['num_less_than']))
print 'Average Power of Houses w/ >='+str(thresh)+'% Error: '+str(model_eval_dict['power_avg_bad']/float(len(model_eval_dict['error_perc'])-model_eval_dict['num_less_than']))
#plt.plot(np.array(power_total_sum_list))
print
print 'Percentage of Houses with <'+str(thresh)+'% Error (Model): ' + str(model_eval_dict['num_less_than_perc'])
print 'Percentage of Houses with <'+str(thresh)+'% Error (Baseline): ' + str(model_eval_dict['baseline_less_than_perc'])
baseline_val=np.sum(model_eval_dict['test_energy'])/len(model_eval_dict['test_energy'])
a=np.empty(len(model_eval_dict['test_energy']))
a[:]=(baseline_val)
base_diff_power_perc=(metric.sum_error(test_data[device_type_name],a)*100/np.sum(test_data[device_type_name]))
plt.figure()
plt.plot(np.absolute(model_eval_dict['error_perc']),'r')
plt.plot(model_eval_dict['baseline_perc'],'k')
plt.title('Percent Error Model (Red), Percent Error Baseline (Black)')
plt.figure()
plt.plot(model_eval_dict['test_energy'],'b')
plt.plot(model_eval_dict['pred_energy'],'r')
plt.plot(a,'k')
plt.title('Predicted Energy (Red), Actual Energy (Blue)')
In [204]:
def evaluate_FHMM_single_type(types_eval,best_models,device_type_name,thresh,plot_sum=True,plot_ind=False):
model_eval_dict={}
test_data={}
model_eval_dict['precision']=[]
model_eval_dict['recall']=[]
model_eval_dict['test_energy']=[]
model_eval_dict['pred_energy']=[]
model_eval_dict['error_perc']=[]
model_eval_dict['power_total_sums']=[]
model_eval_dict['diff_power_perc']=[]
model_eval_dict['power_avg_good']=0
model_eval_dict['power_avg_bad']=0
model_eval_dict['num_less_than']=0
for house_num,instance_test in enumerate(types_eval[device_type_name].instances):
model_eval_dict=eval_model_with_instance(types_eval,device_type_name,best_models,house_num,model_eval_dict,thresh,plot_ind)
for house_num,instance_test in enumerate(types_eval[device_type_name].instances):
for trace_num,trace in enumerate(types_eval[device_type_name].instances[house_num].traces):
test_data[device_type_name]=utils.trace_series_to_numpy_array(types_eval[device_type_name].instances[house_num].traces[trace_num].series)
model_eval_dict['baseline_perc']=(model_eval_dict['test_energy']-(np.sum(model_eval_dict['test_energy'])/len(model_eval_dict['test_energy'])))
model_eval_dict['baseline_less_than']=sum(abs(i) < thresh for i in model_eval_dict['baseline_perc'])
model_eval_dict['baseline_less_than_perc']=model_eval_dict['baseline_less_than']/float(len(model_eval_dict['baseline_perc']))*100
model_eval_dict['num_less_than_perc']=model_eval_dict['num_less_than']/float(len(model_eval_dict['error_perc']))*100
if(plot_sum):
plot_and_sum_model_eval(model_eval_dict)
return model_eval_dict
In [510]:
print np.sum(eval_model(8))
print np.sum(eval_model(12))
print np.sum(eval_model(2))
print np.sum(eval_model(4))
print np.sum(eval_model(3))
Out[510]:
In [28]:
def eval_model(house_num,plot=False):
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(devices_types['use'].instances[house_num].traces[trace_num].series)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
type_models[device_name]=best_model
model_fhmm,means_fhmm=fhmm.generate_FHMM_from_HMMs(type_models)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,test_data,power_total_minus_bottom)
if(plot):
plt.figure()
plt.plot(power_total)
#plt.plot(test_data['air1'])
#plt.plot(decoded_power['air1'],'r')
truth_states=metric.guess_truth_from_power(test_data[device_name],2)
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[device_name])
precision_val=(metric.get_precision(eval_metrics['tp'],eval_metrics['fp']))
recall_val=(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn']))
diff_power_perc.append(metric.sum_error(test_data[device_name],decoded_power[device_name])*4)
#plt.title('Precision:' + str(precision_val) + ', Recall: ' + str(recall_val))
if(precision_val>0 and recall_val>0 and np.sum(decoded_power['air1'])>0):
precision_list.append(precision_val)
recall_list.append(recall_val)
test_energy=(np.sum(test_data['air1'])/len(test_data['air1']))*24*30
pred_energy=(np.sum(decoded_power['air1'])/len(decoded_power['air1']))*24*30
error_perc=float(test_energy-pred_energy)/test_energy*100
test_energy_list.append(test_energy)
pred_energy_list.append(pred_energy)
error_perc_list.append(error_perc)
return np.sum(power_total/len(power_total)*24*30)
else:
return 0
In [30]:
device_type_name='air1'
device_name=device_type_name
print best_model_for_set(bests)
print
diff_power_perc=[]
precision_list=[]
recall_list=[]
test_energy_sum=0
pred_energy_sum=0
test_energy_list=[]
pred_energy_list=[]
error_perc_list=[]
power_total_sum_list=[]
for house_num,device_instance in enumerate(devices_types['air1'].instances):
power=eval_model(house_num)
if(power>0):
power_total_sum_list.append(power)
In [41]:
#Baseline: Get average energy usage of a house and use that.
baseline_list=test_energy_list-sum(test_energy_list)/len(devices_types['air1'].instances)
baseline_less_25=sum(abs(i) < 25 for i in baseline_list)
plt.figure()
plt.plot(np.absolute(error_perc_list))
#num_less_25=sum(abs(i) < 25 for i in error_perc_list)
power_avg_good=0
power_avg_bad=0
num_less_25=0
for i,val in enumerate(error_perc_list):
if(abs(val)<25):
num_less_25=num_less_25+1
power_avg_good=power_avg_good+power_total_sum_list[i]
else:
power_avg_bad=power_avg_bad+power_total_sum_list[i]
print 'Average Power of Houses w/ <25% Error: ' + str(power_avg_good/float(num_less_25))
print 'Average Power of Houses w/ >=25% Error: '+str(power_avg_bad/float(len(error_perc_list)-num_less_25))
#plt.plot(np.absolute(baseline_list),'r')
#plt.plot(np.array(power_total_sum_list))
print
print 'Percentage of Houses with <25% Error (Model): ' + str(num_less_25/float(len(error_perc_list))*100)
print 'Percentage of Houses with <25% Error (Baseline): ' + str(baseline_less_25/float(len(error_perc_list))*100)
plt.figure()
plt.plot(test_energy_list)
plt.plot(pred_energy_list,'r')
plt.plot(baseline_list,'g')
Out[41]:
In [18]:
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[8].traces[trace_num].series)
plt.plot(test_data[device_type_name])
plt.figure()
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[12].traces[trace_num].series)
plt.plot(test_data[device_type_name])
plt.figure()
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[2].traces[trace_num].series)
plt.plot(test_data[device_type_name])
Out[18]:
In [69]:
def get_scores_old(test_data,power_total_minus_bottom,house_num,devices_models):
precision=[]
recall=[]
f1_score=[]
diff_power_perc=[]
diff_power=[]
accuracy=[]
device_name='air1'
i=0
for l,house_id in enumerate(devices_models[device_name]):
type_models[device_name]=devices_models[device_name][house_id]
model_fhmm,means_fhmm=fhmm.generate_FHMM_from_HMMs(type_models)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,test_data,power_total_minus_bottom)
truth_states=metric.guess_truth_from_power(test_data[device_name],2)
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[device_name])
precision_val=(metric.get_precision(eval_metrics['tp'],eval_metrics['fp']))
recall_val=(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn']))
if(len([x for x in decoded_states['air1'] if x > 0])>1):
precision.append(precision_val)
recall.append(recall_val)
f1_score.append(metric.get_f1_score(eval_metrics))
accuracy.append(metric.get_accuracy(eval_metrics))
diff_power.append(metric.sum_error(test_data[device_name],decoded_power[device_name])/4)
diff_power_perc.append(metric.sum_error(test_data[device_name],decoded_power[device_name])*100/np.sum(test_data[device_name]))
else:
i=i+1
devices_models[device_name].pop(house_id,None)
print "Deleted " + str(i) + " of " + str(l+1) + " models for housenum "+str(house_num)+" due to low number of on-state guesses."
eval_df=pd.DataFrame(np.array([devices_models[device_name].keys(),precision,recall,f1_score,accuracy,diff_power_perc]).T,columns=['house_id','precision','recall','f1_score','accuracy','diff_power_perc'])
#Best
best_scores={}
best_scores['f1_score']=eval_df.sort('f1_score', ascending=0).head(10)
best_scores['accuracy']=eval_df.sort('accuracy', ascending=0).head(10)
best_scores['precision']=eval_df.sort('precision', ascending=0).head(10)
best_scores['recall']=eval_df.sort('recall', ascending=0).head(10)
best_scores['diff_power_perc']=eval_df.sort(['diff_power_perc'], ascending=1).head(10)
bests[house_num]=best_scores
#Average
scores={}
scores['precision']=eval_df['precision'].mean()
scores['precision_std']=eval_df['precision'].std()
scores['recall']=eval_df['recall'].mean()
scores['recall_std']=eval_df['recall'].std()
scores['f1_score']=eval_df['f1_score'].mean()
scores['f1_score_std']=eval_df['f1_score'].std()
scores['accuracy']=eval_df['accuracy'].mean()
scores['accuracy_std']=eval_df['accuracy'].std()
scores['diff_power_perc']=eval_df['diff_power_perc'].mean()
scores['diff_power_perc_std']=eval_df['diff_power_perc'].std()
averages[house_num]=scores
#Worst
worst_scores={}
worst_scores['f1_score']=eval_df.sort('f1_score', ascending=1).head(10)
worst_scores['accuracy']=eval_df.sort('accuracy', ascending=1).head(10)
worst_scores['diff_power_perc']=eval_df.sort(['diff_power_perc'], ascending=0).head(10)
worsts[house_num]=worst_scores
avg_accuracy_sum=[]
avg_diff_power_perc_sum=[]
avg_diff_power_perc_std_sum=[]
avg_f1_score_sum=[]
best_f1_score_sum=[]
best_diff_power_perc_sum=[]
worst_f1_score_sum=[]
worst_diff_power_perc_sum=[]
In [20]:
def get_summary_scores(bests,averages,worsts):
avg_accuracy_sum=[]
avg_diff_power_perc_sum=[]
avg_diff_power_perc_std_sum=[]
avg_f1_score_sum=[]
best_diff_power_perc_sum=[]
best_precision_sum=[]
best_recall_sum=[]
best_f1_score_sum=[]
worst_diff_power_perc_sum=[]
worst_f1_score_sum=[]
for index in bests:
avg_accuracy_sum.append(averages[index]['accuracy'])
avg_diff_power_perc_sum.append(averages[index]['diff_power_perc'])
avg_diff_power_perc_std_sum.append(averages[index]['diff_power_perc_std'])
avg_f1_score_sum.append(averages[index]['f1_score'])
best_diff_power_perc_sum.append(np.mean(bests[index]['diff_power_perc']['diff_power_perc']))
best_f1_score_sum.append(np.mean(bests[index]['f1_score']['f1_score']))
best_precision_sum.append(np.mean(bests[index]['f1_score']['precision']))
best_recall_sum.append(np.mean(bests[index]['f1_score']['recall']))
worst_diff_power_perc_sum.append(np.mean(worsts[index]['diff_power_perc']['diff_power_perc']))
worst_f1_score_sum.append(np.mean(worsts[index]['f1_score']['f1_score']))
print "AVERAGE"
print "Accuracy: " + str(np.mean(avg_accuracy_sum))
print "F1 Score: " + str(np.mean(avg_f1_score_sum))
print "Power Percentage Error: " + str(np.mean(avg_diff_power_perc_sum))
print "Power Percentage Error std: " + str(np.mean(avg_diff_power_perc_std_sum))
print "BEST"
print "Power Percentage Error: " + str(np.mean(best_diff_power_perc_sum))
print "F1 Score: " + str(np.mean(best_f1_score_sum))
print "Precision: " + str(np.mean(best_precision_sum))
print "Recall: " + str(np.mean(best_recall_sum))
print "WORST"
print "Power Percentage Error: " + str(np.mean(worst_diff_power_perc_sum))
print "F1 Score: " + str(np.mean(worst_f1_score_sum))
In [120]:
def score_models(device_type_name,devices_types,models_list):
#Generate Test Data
test_data={}
#house_num=random.randint(0,9)
bests={}
averages={}
worsts={}
trace_num=0
for house_num,device_instance in enumerate(devices_types[device_type_name].instances):
test_data[device_type_name]=utils.trace_series_to_numpy_array(devices_types[device_type_name].instances[house_num].traces[trace_num].series)
power_total=utils.trace_series_to_numpy_array(devices_types['use'].instances[house_num].traces[trace_num].series)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
dataid=devices_types[device_type_name].instances[house_num].metadata['dataid']
[bests,averages,worsts]=get_scores(bests,averages,worsts,device_type_name,test_data,power_total_minus_bottom,dataid,deepcopy(models_list))
#Below removes houses with bad data
for house_id in deepcopy(bests):
if(np.count_nonzero(bests[house_id]['f1_score']['precision'])==0 or len(bests[house_id]['f1_score'])<1):
bests.pop(house_id,None)
averages.pop(house_id,None)
worsts.pop(house_id,None)
get_summary_scores(bests,averages,worsts)
return [bests,averages,worsts]
In [ ]:
In [ ]:
In [ ]:
In [21]:
#first_10_top=[2787, 2365, 6836, 7769, 8079, 4922, 2575, 7531, 6910,7617]
#second_10_top=[8079,6836,2365,2787,4922,2575,6910,9930,5109,7617]
#set(first_10_top) & set(second_10_top)
def best_model_for_set(bests):
best_models=[]
for house in bests:
best_id=bests[house]['diff_power_perc']['f1_score'].argmin()
best_models.append(bests[house]['diff_power_perc']['house_id'][best_id])
best_dict={}
for val in best_models:
best_dict[val]=best_models.count(val)
#The house that is best in the most models is the best house,
#as long as it was best for more than one model.
best_model_key_f1=list(reversed(sorted(best_dict.keys())))[0]
return best_model_key_f1
In [ ]:
In [153]:
house_ids_top_10=[]
house_ids_dict={}
i=0
for val in bests:
for val2 in bests[val]:
i=i+1
for val3 in bests[val][val2]['house_id']:
house_ids_dict[val3]=0
for val in bests:
for val2 in bests[val]:
for val3 in bests[val][val2]['house_id']:
house_ids_top_10.append(val3)
house_ids_dict[val3]=house_ids_dict[val3]+1
import operator
x=house_ids_dict
sorted_x = sorted(x.iteritems(), key=operator.itemgetter(1))
print sorted_x
print i
In [182]:
def get_scores(bests,averages,worsts,device_type_name,test_data,power_total_minus_bottom,house_num,models_list):
precision=[]
recall=[]
f1_score=[]
diff_power_perc=[]
diff_power=[]
accuracy=[]
house_ids=[]
i=0
for model_name in models_list[device_type_name]:
type_models={}
type_models[device_type_name]=models_list[device_type_name][model_name]
model_fhmm,means_fhmm=fhmm.generate_FHMM_from_HMMs(type_models)
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,test_data,power_total_minus_bottom)
truth_states=metric.guess_truth_from_power(test_data[device_type_name],2)
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[device_type_name])
precision_val=(metric.get_precision(eval_metrics['tp'],eval_metrics['fp']))
recall_val=(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn']))
if(len([x for x in decoded_states[device_type_name] if x > 0])>1):
house_ids.append(model_name)
precision.append(precision_val)
recall.append(recall_val)
f1_score.append(metric.get_f1_score(eval_metrics))
accuracy.append(metric.get_accuracy(eval_metrics))
diff_power.append(metric.sum_error(test_data[device_type_name],decoded_power[device_type_name])/4)
diff_power_perc.append(metric.sum_error(test_data[device_type_name],decoded_power[device_type_name])*100/np.sum(test_data[device_type_name]))
else:
i=i+1
print "Deleted " + str(i) + " of " + str(l+1) + " models for house "+str(house_num)+" due to low number of on-state guesses."
eval_df=pd.DataFrame(np.array([house_ids,precision,recall,f1_score,accuracy,diff_power_perc]).T,
columns=['house_id','precision','recall','f1_score','accuracy','diff_power_perc'])
#Best
best_scores={}
best_scores['f1_score']=eval_df.sort('f1_score', ascending=0).head(10)
best_scores['accuracy']=eval_df.sort('accuracy', ascending=0).head(10)
best_scores['precision']=eval_df.sort('precision', ascending=0).head(10)
best_scores['recall']=eval_df.sort('recall', ascending=0).head(10)
best_scores['diff_power_perc']=eval_df.sort(['diff_power_perc'], ascending=1).head(10)
bests[house_num]=best_scores
#Average
scores={}
scores['precision']=eval_df['precision'].mean()
scores['precision_std']=eval_df['precision'].std()
scores['recall']=eval_df['recall'].mean()
scores['recall_std']=eval_df['recall'].std()
scores['f1_score']=eval_df['f1_score'].mean()
scores['f1_score_std']=eval_df['f1_score'].std()
scores['accuracy']=eval_df['accuracy'].mean()
scores['accuracy_std']=eval_df['accuracy'].std()
scores['diff_power_perc']=eval_df['diff_power_perc'].mean()
scores['diff_power_perc_std']=eval_df['diff_power_perc'].std()
averages[house_num]=scores
#Worst
worst_scores={}
worst_scores['f1_score']=eval_df.sort('f1_score', ascending=1).head(10)
worst_scores['accuracy']=eval_df.sort('accuracy', ascending=1).head(10)
worst_scores['diff_power_perc']=eval_df.sort(['diff_power_perc'], ascending=0).head(10)
worsts[house_num]=worst_scores
return [bests,averages,worsts]
In [ ]:
#Get different model sets for May, possibly some with different transition matrices