In [1]:
import os
from disaggregator import appliance
from disaggregator import fhmm
from disaggregator import utils
from disaggregator import evaluation_metrics as metric
import pandas as pd
import numpy as np
from collections import OrderedDict
import datetime
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
def get_types_from_sample_folder(directory):
traces=[]
instances=[]
types={}
for i,file in enumerate(os.listdir(directory)):
house_path = directory + file
type_name=file
instances=[]
for j,file in enumerate(os.listdir(house_path)):
device_path = house_path + '/'+ file
traces=[]
for k,file in enumerate(os.listdir(device_path)):
file_path = device_path + '/' + file
df = pd.read_csv(file_path,squeeze=True,header=None)
series=pd.Series(df[1].values,index=df[0].values)
meta_trace={'source':'sample','dataid':str(j),'device_name':type_name,'tracenum':k}
traces.append(appliance.ApplianceTrace(series,meta_trace))
meta_instance={'source':'sample','dataid':str(j),'device_name':type_name}
instances.append(appliance.ApplianceInstance(traces,meta_instance))
meta_type={'source':'sample','device_name':type_name}
types[type_name]=appliance.ApplianceType(instances,meta_type)
return types
types_test=get_types_from_sample_folder("./sample_data/test/")
types_train=get_types_from_sample_folder("./sample_data/train/")
Hidden markov models associate continuous variables with a finite set of pre-defined states. These states are set as priors, but can be fit to new data entered into the HMM using the fit() method.
It is best to set the priors (specifically the starting probability and transition matrix) using domain knowledge. The means will be mostly overwritten by the fitting of the data.
After the model parameters have been set, the model can be used to predict which state a new observed variable is associated with using the predict() method. The output will be a hidden state for that variable. The HMM decides which state a new variable should be in using its several parameters.
1) A HMM has been developed with a OFF state mean of 0 and covariance of 0.1 and an ON state mean of 10 and covariance of 1. The HMM will use the Gaussian probability density function to calculate the probability of an observation. It will have one distribution with mean 0 and covariance 0.1, and another with mean 10 and covariance 1. A new observation comes in with a value of 7. Let's say that given the Gaussian distribution, there is a 70% that the new observation is in state 1 and a 10% chance that the new observation is in state 0. The HMM will look at both distributions to see the probability that the observation is in either of those states. 7 is closer to 10 so it will have a greater probability of being in the ON state; however, a few other parameters are also used before making this decision. If this was the first observed variable, it will use the starting probability as well. Let's say the starting probability is [0.8,0.2]. This indicates that there is an 80% chance that the observed variables will start at state 0 and a 20% that they will start at state 1. Therefore, the starting probabilities will be multiplied by the distribution probabilities. So the overall probability that this observed variable is in state 0 is 10%80%=8%, and the probability of it being in state 1 is 70%20%=14%. Therefore, there is still a greater probability of it being in state 1 and so a hidden state of state 1 will be assigned to the observed variable.
2) Lets say that the next observed variable has a value of 1. There is now a 80% chance of this variable being in state 0 and a 15% chance of it being in state 1 using the Gaussian distribution. In this case, rather than using the starting probabilities, the HMM will utilize the transition matrix parameters. Let's say the transition matrix is [0.9,.1],[0.2,0.8]. This transition matrix indicates that the probability that the HMM will have gone from a previous state of state 0 to a new state 0 is 90%, state 0 to state 1 is 10%, state 1 to state 1 is 80%, and state 1 to state 0 is 20%. Given this transition matrix, and the fact that the previously observed variable was in state 1, the probability that this observed variable is in state 0 is 80%20%=16%. The probability that this observed variable is in state 1 is 15%80%=12%. Therefore, the hidden state for this new observed variable is state 0.
In [3]:
#Initialize Variables for Model
models={}
pi_prior={}
a_prior={}
mean_prior={}
cov_prior={}
best_model_index=OrderedDict()
In [4]:
type_name='air'
key_for_model_name='dataid'
pi_prior[type_name]=np.array([0.5,0.5])
a_prior[type_name]=np.array([[0.5,0.5],[0.5,0.5]])
mean_prior[type_name]=np.array([[0],[1]])
cov_prior[type_name]=np.tile(1, (2, 1, 1))
models[type_name]=fhmm.generate_HMMs_from_type(types_train[type_name],
pi_prior[type_name],
a_prior[type_name],
mean_prior[type_name],
cov_prior[type_name],
key_for_model_name)
In [5]:
best_model_index[type_name]=fhmm.get_best_instance_model(models[type_name],
types_train[type_name],key_for_model_name)
In [6]:
type_name='refrigerator'
key_for_model_name='dataid'
pi_prior[type_name]=np.array([0.5,0.5])
a_prior[type_name]=np.array([[0.6,0.4],[0.4,0.6]])
mean_prior[type_name]=np.array([[0],[.2]])
cov_prior[type_name]=np.tile(1, (2, 1, 1))
models[type_name]=fhmm.generate_HMMs_from_type(types_train[type_name],
pi_prior[type_name],
a_prior[type_name],
mean_prior[type_name],
cov_prior[type_name],
key_for_model_name)
In [7]:
best_model_index[type_name]=fhmm.get_best_instance_model(models[type_name],
types_train[type_name],key_for_model_name)
In [8]:
best_models={}
for type_name in models:
best_models[type_name]=models[type_name][best_model_index[type_name]]
model_fhmm,means_fhmm,variances_fhmm=fhmm.generate_FHMM_from_HMMs(best_models)
In [12]:
house_id=0
trace_num=0
test_data={}
for type_name in models:
test_data[type_name]=utils.trace_series_to_numpy_array(types_test[type_name].instances[house_id].traces[trace_num].series)
total=utils.trace_series_to_numpy_array(types_test['use'].instances[house_id].traces[trace_num].series)
plt.plot(total,label='total')
plt.title('Actual Energy')
plt.ylabel('Energy (Wh)')
plt.xlabel('Time')
for i,type_name in enumerate(best_models):
plt.figure(1)
plt.plot(test_data[type_name],label=type_name)
plt.legend(bbox_to_anchor=(0., 1.05, 1, .102), loc=3,
ncol=2, mode="expand", borderaxespad=1.)
plt.figure()
[decoded_states, decoded_power]=fhmm.predict_with_FHMM(model_fhmm,means_fhmm,variances_fhmm,total)
In [14]:
plt.plot(test_data['air'],'r',label='actual')
plt.plot(decoded_power['air'],'k',label='predicted')
plt.legend(bbox_to_anchor=(0., 1.05, 1, .102), loc=3,
ncol=2, mode="expand", borderaxespad=1.)
Out[14]:
In [11]:
#Evaluate
for type_name in types_test:
if type_name != 'use':
truth_states=metric.guess_truth_from_power(test_data[type_name],.0001)
for i,num in enumerate(test_data[type_name]):
if decoded_states[type_name][i]>1:
decoded_states[type_name][i]=1
eval_metrics=metric.get_positive_negative_stats(truth_states,decoded_states[type_name])
print type_name
print 'precision:' + str(metric.get_precision(eval_metrics['tp'],eval_metrics['fp'])*100)+'%'
print 'recall:' + str(metric.get_sensitivity(eval_metrics['tp'],eval_metrics['fn'])*100)+'%'
print 'diff in power:' + str(metric.sum_error(test_data[type_name],decoded_power[type_name]))+'Wh'
print
In [63]:
device_type_name='air'
air_model= devices_models[device_type_name][best_model[device_type_name]]
X,Z = air_model.sample(14400)
sampled_air = X
device_type_name='refrigerator'
refrigerator_model= devices_models[device_type_name][best_model[device_type_name]]
X,Z = refrigerator_model.sample(14400)
sampled_refrigerator = X
air=[]
time=[]
for i,val in enumerate(sampled_air):
val=val[0]
if val<0:
val=0
time_val=datetime.datetime.today()+datetime.timedelta(seconds=60*i)
time_val=time_val.replace(second=0,microsecond=0,tzinfo=None)
time.append(time_val)
air.append(val)
fridge=[]
for i,val in enumerate(sampled_refrigerator):
val=val[0]
if val<0:
val=0
fridge.append(val)
total=[]
for i,val in enumerate(air):
total.append(val + fridge[i])
air_series=pd.Series(air,index=time)
meta_air={'source':'model','dataid':'abcd','device_name':'air','tracenum':0}
air_trace=appliance.ApplianceTrace(air_series,meta_air)
fridge_series=pd.Series(fridge,index=time)
meta_fridge={'source':'model','dataid':'abcd','device_name':'refrigerator','tracenum':0}
fridge_trace=appliance.ApplianceTrace(fridge_series,meta_fridge)
total_series=pd.Series(total,index=time)
meta_total={'source':'model','dataid':'abcd','device_name':'use','tracenum':0}
total_trace=appliance.ApplianceTrace(total_series,meta_total)
plt.plot(total_series.index,total_series)
plt.figure()
plt.plot(air_series.index,air_series)
plt.plot(fridge_series.index,fridge_series)
Out[63]:
In [ ]: