In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn import hmm
from sklearn.metrics import confusion_matrix
from collections import OrderedDict
import itertools
from copy import deepcopy
import datetime
import pandas as pd
import matplotlib.pylab as pylab
import scipy
import scipy.fftpack
import pylab
import sys
sys.path.append('../../') # or non-Unix equivalent (add wikienergy/ to path)
import disaggregator as da
from numpy import sin, linspace, pi
from pylab import plot, show, title, xlabel, ylabel, subplot
from scipy import fft, arange
from scipy import pi
import os
%matplotlib inline
pylab.rcParams['figure.figsize'] = 16, 9
In [2]:
#Produces arrays of arrays for HMM
def values_to_array(values):
a=[]
X=[]
for i in values:
a.append(float(i))
X.append(a)
a=[]
return np.array(X)
In [3]:
def sort_startprob(mapping, startprob):
""" Sort the startprob according to power means; as returned by mapping
"""
num_elements = len(startprob)
new_startprob = np.zeros(num_elements)
for i in xrange(len(startprob)):
new_startprob[i] = startprob[mapping[i]]
return new_startprob
In [4]:
def sort_covars(mapping, covars):
num_elements = len(covars)
new_covars = np.zeros_like(covars)
for i in xrange(len(covars)):
new_covars[i] = covars[mapping[i]]
return new_covars
In [5]:
def sort_transition_matrix(mapping, A):
""" Sorts the transition matrix according to power means; as returned by mapping
"""
num_elements = len(A)
A_new = np.zeros((num_elements, num_elements))
for i in range(num_elements):
for j in range(num_elements):
A_new[i,j] = A[mapping[i], mapping[j]]
return A_new
In [6]:
def return_sorting_mapping(means):
means_copy = deepcopy(means)
# Sorting
means_copy = np.sort(means_copy, axis = 0)
# Finding mapping
mapping = {}
mapping_set=set()
x=0
for i, val in enumerate(means_copy):
x= np.where(val==means)[0]
for val in x:
if val not in mapping_set:
mapping_set.add(val)
mapping[i]=val
break
return mapping
In [7]:
def sort_learnt_parameters(startprob, means, covars, transmat):
mapping = return_sorting_mapping(means)
means_new = np.sort(means, axis = 0)
startprob_new = sort_startprob(mapping, startprob)
covars_new = sort_covars(mapping, covars)
transmat_new = sort_transition_matrix(mapping, transmat)
assert np.shape(means_new) == np.shape(means)
assert np.shape(startprob_new) == np.shape(startprob)
assert np.shape(transmat_new) == np.shape(transmat)
return [startprob_new, means_new, covars_new, transmat_new]
In [8]:
def initInstanceModelFromTraceFolder(pi_prior,a_prior,mean_prior,cov_prior):
model=hmm.GaussianHMM(pi_prior.size, 'full', pi_prior,a_prior)
model.means_ = mean_prior
model.covars_ = cov_prior
return model
In [9]:
#Just Using Values as single feature
#Makes sure not to update blank models
def fitTraceToInstanceModel(model,trace):
trace_values=values_to_array(trace.series)
model.fit([trace_values])
startprob, means, covars, transmat = sort_learnt_parameters(model.startprob_, model.means_, model.covars_ , model.transmat_)
model=hmm.GaussianHMM(startprob.size, 'full', startprob, transmat)
model.means_ = means
model.covars_ = covars
return model
In [10]:
#Using Values as single feature
def getAllInstanceModels(device_type,device_type_name,key_for_model_name,pi_prior,a_prior,mean_prior,cov_prior):
model_list=OrderedDict()
device_instances=OrderedDict()
for instance in devices_types[device_type_name].instances:
instance_name=instance.traces[0].metadata[key_for_model_name]
model_list[instance_name]=initInstanceModelFromTraceFolder(pi_prior,a_prior,mean_prior,cov_prior)
for trace in instance.traces[1:]:
model_list[instance_name]=fitTraceToInstanceModel(model_list[instance_name],trace)
return model_list
In [11]:
def getBestInstanceModel(models,device_type,key_for_model_name):
dfs_model={}
best_model_score=0
for model_name in models:
instances_of_model=[]
for instance in device_type.instances:
test_trace=instance.traces[0]
instance_name=test_trace.metadata[key_for_model_name]
test=values_to_array(test_trace.series)
model_score=models[model_name].score(test)
instances_of_model.append([model_name,instance_name,model_score])
if(model_score>best_model_score):
best_model=models[model_name]
dfs_model[model_name] = pd.DataFrame(data=instances_of_model,columns=['Model_Instance','Test_Instance','Value'])
model_averages=[]
for key in dfs_model:
sum=0
for row in dfs_model[key].iterrows():
sum=sum+row[1]['Value']
model_averages.append([key,sum/len(dfs_model[key].index)])
print
avg_model_df = pd.DataFrame(data=model_averages,columns=['Model_Instance','Avg Probability'])
print avg_model_df.sort('Avg Probability',ascending=False)
bestModel= avg_model_df.sort('Avg Probability',ascending=False).sort('Avg Probability',ascending=False).head(1)['Model_Instance'].values[0]
print str(bestModel)+' is best.'
return bestModel
In [12]:
def compute_pi_fhmm(list_pi):
'''
Input: list_pi: List of PI's of individual learnt HMMs
Output: Combined Pi for the FHMM
'''
result=list_pi[0]
for i in range(len(list_pi)-1):
result=np.kron(result,list_pi[i+1])
return result
def compute_A_fhmm(list_A):
'''
Input: list_pi: List of PI's of individual learnt HMMs
Output: Combined Pi for the FHMM
'''
result=list_A[0]
for i in range(len(list_A)-1):
result=np.kron(result,list_A[i+1])
return result
def compute_means_fhmm(list_means):
'''
Returns [mu, sigma]
'''
#list_of_appliances_centroids=[ [appliance[i][0] for i in range(len(appliance))] for appliance in list_B]
states_combination=list(itertools.product(*list_means))
print states_combination
num_combinations=len(states_combination)
print num_combinations
means_stacked=np.array([sum(x) for x in states_combination])
means=np.reshape(means_stacked,(num_combinations,1))
cov=np.tile(5*np.identity(1), (num_combinations, 1, 1))
return [means, cov]
def create_combined_hmm(n, pi, A, mean, cov):
combined_model=hmm.GaussianHMM(n_components=n,covariance_type='full', startprob=pi, transmat=A)
combined_model.covars_=cov
combined_model.means_=mean
return combined_model
def decode_hmm(length_sequence, centroids, appliance_list, states):
'''
Decodes the HMM state sequence
'''
power_states_dict={}
hmm_states={}
hmm_power={}
total_num_combinations=1
for appliance in appliance_list:
total_num_combinations*=len(centroids[appliance])
for appliance in appliance_list:
hmm_states[appliance]=np.zeros(length_sequence,dtype=np.int)
hmm_power[appliance]=np.zeros(length_sequence)
for i in range(length_sequence):
factor=total_num_combinations
for appliance in appliance_list:
#assuming integer division (will cause errors in Python 3x)
factor=factor//len(centroids[appliance])
temp=int(states[i])/factor
hmm_states[appliance][i]=temp%len(centroids[appliance])
hmm_power[appliance][i]=centroids[appliance][hmm_states[appliance][i]]
return [hmm_states,hmm_power]
In [13]:
#Load Datasets
devices_types={}
folder_path='/home/steve/DSSG/wikienergy/data/Tracebase/'
dataset=da.TracebaseDatasetAdapter(folder_path,'D','15T')
In [14]:
device_name='Cookingstove'
devices_types[device_name]=dataset.generate_type(device_name)
In [18]:
device_name='Dishwasher'
devices_types[device_name]=dataset.generate_type(device_name)
In [19]:
device_name='refrigerator_good'
devices_types[device_name]=dataset.generate_type(device_name)
In [36]:
device_name='Freezer'
devices_types[device_name]=dataset.generate_type(device_name)
In [21]:
device_name='Washingmachine_large'
devices_types[device_name]=dataset.generate_type(device_name)
In [15]:
#Look at individual trace
device_name_test='Cookingstove'
device_type_test=devices_types[device_name_test]
index=0
index2=1
print type(device_type_test.instances[index].traces[index2].series[0])
print device_type_test.instances[index].traces[index2].metadata
plt.plot(device_type_test.instances[index].traces[index2].series)
Out[15]:
In [20]:
#Initialize Variables for Model
devices_models={}
devices_data={}
devices_type_names=[]
pi_prior={}
a_prior={}
mean_prior={}
cov_prior={}
best_model=OrderedDict()
In [21]:
## 1 Feature
device_type_name='Cookingstove'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.1,0.1,0.8])
a_prior[device_type_name]=np.array([[0.95,0.04,.01],[0.04,0.95,0.01],[0.04,0.01,0.95]])
mean_prior[device_type_name]=np.array([[0],[35], [60]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
devices_type_names.append(device_type_name)
In [22]:
## 1 Feature
device_type_name='Dishwasher'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.1,0.1,0.8])
a_prior[device_type_name]=np.array([[0.95,0.025,.025],[0.025,0.95,0.025],[0.025,0.025,0.95]])
mean_prior[device_type_name]=np.array([[0],[300], [600]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
devices_type_names.append(device_type_name)
In [26]:
## 1 Feature
device_type_name='refrigerator_good'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.33,0.33,.34])
a_prior[device_type_name]=np.array([[0.5,0.25,0.25],[0.25,0.5,.25],[0.5,0.25,.25]])
mean_prior[device_type_name]=np.array([[0],[4],[12]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
devices_type_names.append(device_type_name)
In [27]:
## 1 Feature
device_type_name='Washingmachine_small'
key_for_model_name='instance_name'
devices_type_names.append(device_type_name)
pi_prior[device_type_name]=np.array([0.5,0.5])
a_prior[device_type_name]=np.array([[0.8,0.2],[0.2,0.8]])
mean_prior[device_type_name]=np.array([[0],[50]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
In [28]:
## 1 Feature
device_type_name='Washingmachine_large'
key_for_model_name='instance_name'
devices_type_names.append(device_type_name)
pi_prior[device_type_name]=np.array([0.5,0.5])
a_prior[device_type_name]=np.array([[0.8,0.2],[0.2,0.8]])
mean_prior[device_type_name]=np.array([[0],[300]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
In [30]:
#Aggregating Signals using the first instance in each ordered dict.
power_total=np.zeros((96,1))
time_total=[]
test_data={}
test_time={}
#Create generic time array
today=datetime.datetime.min
mins_15 = datetime.timedelta(minutes=15)
for i,val in enumerate(power_total):
time_total.append((today+i*mins_15).time())
plt.figure()
for device_name in devices_type_names:
print device_name
device_type=devices_types[device_name]
test_data[device_name]=values_to_array(device_type.instances[0].traces[0].series)
test_time[device_name]=device_type.instances[0].traces[0].series.index.time
power_total=power_total+test_data[device_name]
plt.plot(test_time[device_name],test_data[device_name],label=device_name)
plt.plot(time_total,power_total,label='total');
plt.title("Simulated total energy")
plt.xlabel("Time")
plt.ylabel("Energy (Wh)");
plt.legend(bbox_to_anchor=(0., 1.05, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
Out[30]:
In [31]:
#Creates FHMM
list_pi=[pi_prior[appliance] for appliance in best_model]
list_A=[a_prior[appliance] for appliance in best_model]
list_means=[mean_prior[appliance].flatten().tolist() for appliance in best_model]
pi_combined=compute_pi_fhmm(list_pi)
A_combined=compute_A_fhmm(list_A)
[mean_combined, cov_combined]=compute_means_fhmm(list_means)
model_fhmm=create_combined_hmm(len(pi_combined),pi_combined, A_combined, mean_combined, cov_combined)
power_total_minus_bottom=[]
for i in power_total:
power_total_minus_bottom.append(i-power_total.min())
learnt_states=model_fhmm.predict(power_total_minus_bottom)
[decoded_states, decoded_power]=decode_hmm(len(learnt_states), mean_prior, [appliance for appliance in best_model], learnt_states)
In [32]:
plt.figure()
plt.plot(time_total,power_total_minus_bottom)
plt.title('Aggregated Energy without constant power')
plt.ylabel('Energy (Wh)')
plt.xlabel('Time')
for i,device_type in enumerate(best_model):
plt.figure()
#plt.subplot(2,1,1)
plt.plot(test_time[device_type],test_data[device_type])
plt.title('Ground Truth Energy for %s' %device_type)
#plt.subplot(2,1,2)
plt.plot(time_total,decoded_power[device_type])
plt.title('Predicted Energy for %s' %device_type)
plt.ylabel('Energy (Wh)')
plt.xlabel('Time')
plt.ylim((np.min(test_data[device_type])-10, np.max(test_data[device_type])+10))
plt.tight_layout()
In [33]:
for device_type in devices_type_names:
truth_states=da.evaluation_metrics.guess_truth_from_power(test_data[device_type],2)
eval_metrics=da.evaluation_metrics.get_positive_negative_stats(truth_states,decoded_states[device_type])
print device_type
print 'precision:' + str(da.evaluation_metrics.get_precision(eval_metrics['tp'],eval_metrics['fp'])*100)+'%'
print 'recall:' + str(da.evaluation_metrics.get_sensitivity(eval_metrics['tp'],eval_metrics['fn'])*100)+'%'
print 'diff in power:' + str(da.evaluation_metrics.sum_error(test_data[device_type],decoded_power[device_type]))+'Wh'
print