In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn import hmm

from sklearn.metrics import confusion_matrix
from collections import OrderedDict
import itertools

from copy import deepcopy
import datetime

import pandas as pd


import matplotlib.pylab as pylab
import scipy
import scipy.fftpack
import pylab
import sys
sys.path.append('../../') # or non-Unix equivalent (add wikienergy/ to path)
import disaggregator as da

from numpy import sin, linspace, pi
from pylab import plot, show, title, xlabel, ylabel, subplot
from scipy import fft, arange
from scipy import pi

import os
%matplotlib inline
pylab.rcParams['figure.figsize'] = 16, 9


/usr/local/lib/python2.7/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [2]:
#Produces arrays of arrays for HMM
def values_to_array(values):
    a=[]
    X=[]
    for i in values:
        a.append(float(i))
        X.append(a)
        a=[]
    return np.array(X)

In [3]:
def sort_startprob(mapping, startprob):
    """ Sort the startprob according to power means; as returned by mapping
    """

    num_elements = len(startprob)
    new_startprob = np.zeros(num_elements)
    for i in xrange(len(startprob)):
        new_startprob[i] = startprob[mapping[i]]
    return new_startprob

In [4]:
def sort_covars(mapping, covars):
    num_elements = len(covars)
    new_covars = np.zeros_like(covars)
    for i in xrange(len(covars)):
        new_covars[i] = covars[mapping[i]]
    return new_covars

In [5]:
def sort_transition_matrix(mapping, A):
    """ Sorts the transition matrix according to power means; as returned by mapping
    """
    num_elements = len(A)
    A_new = np.zeros((num_elements, num_elements))
    for i in range(num_elements):
        for j in range(num_elements):
            A_new[i,j] = A[mapping[i], mapping[j]]   
    return A_new

In [6]:
def return_sorting_mapping(means):
    means_copy = deepcopy(means)
    # Sorting 
    means_copy = np.sort(means_copy, axis = 0)  
    # Finding mapping
    mapping = {}
    mapping_set=set()
    x=0
    for i, val in enumerate(means_copy):
        x= np.where(val==means)[0]
        for val in x:
            if val not in mapping_set:
                mapping_set.add(val)
                mapping[i]=val
                break
    return mapping

In [7]:
def sort_learnt_parameters(startprob, means, covars, transmat):
    mapping = return_sorting_mapping(means)
    means_new = np.sort(means, axis = 0)
    
    startprob_new = sort_startprob(mapping, startprob)
    covars_new = sort_covars(mapping, covars)
    transmat_new = sort_transition_matrix(mapping, transmat)
    assert np.shape(means_new) == np.shape(means)
    assert np.shape(startprob_new) == np.shape(startprob)
    assert np.shape(transmat_new) == np.shape(transmat)
    return [startprob_new, means_new, covars_new, transmat_new]

In [8]:
def initInstanceModelFromTraceFolder(pi_prior,a_prior,mean_prior,cov_prior):
    model=hmm.GaussianHMM(pi_prior.size, 'full', pi_prior,a_prior)
    model.means_ = mean_prior
    model.covars_ = cov_prior
    return model

In [9]:
#Just Using Values as single feature
#Makes sure not to update blank models
def fitTraceToInstanceModel(model,trace):
    trace_values=values_to_array(trace.series)
    model.fit([trace_values])
    startprob, means, covars, transmat = sort_learnt_parameters(model.startprob_, model.means_, model.covars_ , model.transmat_) 
    model=hmm.GaussianHMM(startprob.size, 'full', startprob, transmat)
    model.means_ = means
    model.covars_ = covars
    return model

In [10]:
#Using Values as single feature
def getAllInstanceModels(device_type,device_type_name,key_for_model_name,pi_prior,a_prior,mean_prior,cov_prior):
    model_list=OrderedDict()
    device_instances=OrderedDict()
    for instance in devices_types[device_type_name].instances:
        instance_name=instance.traces[0].metadata[key_for_model_name]
        model_list[instance_name]=initInstanceModelFromTraceFolder(pi_prior,a_prior,mean_prior,cov_prior)
        for trace in instance.traces[1:]:
            model_list[instance_name]=fitTraceToInstanceModel(model_list[instance_name],trace)
    return model_list

In [11]:
def getBestInstanceModel(models,device_type,key_for_model_name):
    dfs_model={}
    best_model_score=0
    
    for model_name in models:
        instances_of_model=[]
        for instance in device_type.instances:
            test_trace=instance.traces[0]
            instance_name=test_trace.metadata[key_for_model_name]
            test=values_to_array(test_trace.series)
            model_score=models[model_name].score(test)
            instances_of_model.append([model_name,instance_name,model_score])
            if(model_score>best_model_score):
                best_model=models[model_name]
        dfs_model[model_name] = pd.DataFrame(data=instances_of_model,columns=['Model_Instance','Test_Instance','Value'])
    model_averages=[]   
    for key in dfs_model:
        sum=0
        for row in dfs_model[key].iterrows():
            sum=sum+row[1]['Value']
        model_averages.append([key,sum/len(dfs_model[key].index)])
    print
    avg_model_df = pd.DataFrame(data=model_averages,columns=['Model_Instance','Avg Probability'])
    print avg_model_df.sort('Avg Probability',ascending=False)
    bestModel= avg_model_df.sort('Avg Probability',ascending=False).sort('Avg Probability',ascending=False).head(1)['Model_Instance'].values[0]
    print str(bestModel)+' is best.'
    return bestModel

In [12]:
def compute_pi_fhmm(list_pi):
    '''
    Input: list_pi: List of PI's of individual learnt HMMs
    Output: Combined Pi for the FHMM
    '''
    result=list_pi[0]
    for i in range(len(list_pi)-1):
        result=np.kron(result,list_pi[i+1])
    return result

def compute_A_fhmm(list_A):
    '''
    Input: list_pi: List of PI's of individual learnt HMMs
    Output: Combined Pi for the FHMM
    '''
    result=list_A[0]
    for i in range(len(list_A)-1):
        result=np.kron(result,list_A[i+1])
    return result

def compute_means_fhmm(list_means):  
    '''
    Returns [mu, sigma]
    '''
    
    #list_of_appliances_centroids=[ [appliance[i][0] for i in range(len(appliance))] for appliance in list_B]
    states_combination=list(itertools.product(*list_means))
    print states_combination
    num_combinations=len(states_combination)
    print num_combinations
    means_stacked=np.array([sum(x) for x in states_combination])
    means=np.reshape(means_stacked,(num_combinations,1)) 
    cov=np.tile(5*np.identity(1), (num_combinations, 1, 1))
    return [means, cov] 

def create_combined_hmm(n, pi, A, mean, cov):
    combined_model=hmm.GaussianHMM(n_components=n,covariance_type='full', startprob=pi, transmat=A)
    combined_model.covars_=cov
    combined_model.means_=mean
    return combined_model

def decode_hmm(length_sequence, centroids, appliance_list, states):
    '''
    Decodes the HMM state sequence
    '''
    power_states_dict={}    
    hmm_states={}
    hmm_power={}
    total_num_combinations=1
    for appliance in appliance_list:
        total_num_combinations*=len(centroids[appliance])  

    for appliance in appliance_list:
        hmm_states[appliance]=np.zeros(length_sequence,dtype=np.int)
        hmm_power[appliance]=np.zeros(length_sequence)
        
    for i in range(length_sequence):
        factor=total_num_combinations
        for appliance in appliance_list:
            #assuming integer division (will cause errors in Python 3x)
            factor=factor//len(centroids[appliance])
            
            temp=int(states[i])/factor
            hmm_states[appliance][i]=temp%len(centroids[appliance])
            hmm_power[appliance][i]=centroids[appliance][hmm_states[appliance][i]]
            
    return [hmm_states,hmm_power]

In [13]:
#Load Datasets
devices_types={}
folder_path='/home/steve/DSSG/wikienergy/data/Tracebase/'
dataset=da.TracebaseDatasetAdapter(folder_path,'D','15T')

In [14]:
device_name='Cookingstove'
devices_types[device_name]=dataset.generate_type(device_name)

In [18]:
device_name='Dishwasher'
devices_types[device_name]=dataset.generate_type(device_name)

In [19]:
device_name='refrigerator_good'
devices_types[device_name]=dataset.generate_type(device_name)

In [36]:
device_name='Freezer'
devices_types[device_name]=dataset.generate_type(device_name)

In [21]:
device_name='Washingmachine_large'
devices_types[device_name]=dataset.generate_type(device_name)

In [15]:
#Look at individual trace
device_name_test='Cookingstove'
device_type_test=devices_types[device_name_test]
index=0
index2=1
print type(device_type_test.instances[index].traces[index2].series[0])
print device_type_test.instances[index].traces[index2].metadata
plt.plot(device_type_test.instances[index].traces[index2].series)


<class 'decimal.Decimal'>
{'instance_name': 'D33097', 'source': 'Tracebase', 'trace_num': 0, 'date': '2011.12.18', 'device_name': 'Cookingstove'}
Out[15]:
[<matplotlib.lines.Line2D at 0x7f7cd2ff2890>]

In [20]:
#Initialize Variables for Model
devices_models={}
devices_data={}

devices_type_names=[]
pi_prior={}
a_prior={}
mean_prior={}
cov_prior={}
best_model=OrderedDict()

In [21]:
## 1 Feature
device_type_name='Cookingstove'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.1,0.1,0.8])
a_prior[device_type_name]=np.array([[0.95,0.04,.01],[0.04,0.95,0.01],[0.04,0.01,0.95]])
mean_prior[device_type_name]=np.array([[0],[35], [60]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)

devices_type_names.append(device_type_name)


  Model_Instance  Avg Probability
0         D33097    -81235.678368
D33097 is best.

In [22]:
## 1 Feature
device_type_name='Dishwasher'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.1,0.1,0.8])
a_prior[device_type_name]=np.array([[0.95,0.025,.025],[0.025,0.95,0.025],[0.025,0.025,0.95]])
mean_prior[device_type_name]=np.array([[0],[300], [600]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)

devices_type_names.append(device_type_name)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-22-fb44a859886a> in <module>()
      6 mean_prior[device_type_name]=np.array([[0],[300], [600]])
      7 cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
----> 8 devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
      9 best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)
     10 

KeyError: 'Dishwasher'

In [26]:
## 1 Feature
device_type_name='refrigerator_good'
key_for_model_name='instance_name'
pi_prior[device_type_name]=np.array([0.33,0.33,.34])
a_prior[device_type_name]=np.array([[0.5,0.25,0.25],[0.25,0.5,.25],[0.5,0.25,.25]])
mean_prior[device_type_name]=np.array([[0],[4],[12]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)

devices_type_names.append(device_type_name)


  Model_Instance  Avg Probability
2         76C07F      -237.193491
3         599393      -269.263379
7   Refrigerator      -329.900378
1         995DED      -681.390666
5         D331DA      -763.358444
0         D32131      -945.182602
4         B83B9E     -1045.103234
6         B7E6F4     -2047.240037
76C07F is best.

In [27]:
## 1 Feature
device_type_name='Washingmachine_small'
key_for_model_name='instance_name'
devices_type_names.append(device_type_name)
pi_prior[device_type_name]=np.array([0.5,0.5])
a_prior[device_type_name]=np.array([[0.8,0.2],[0.2,0.8]])
mean_prior[device_type_name]=np.array([[0],[50]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)


  Model_Instance  Avg Probability
1         9966DE       121.251925
2         D3230E      -270.274882
0         D338C9     -1471.318193
9966DE is best.

In [28]:
## 1 Feature
device_type_name='Washingmachine_large'
key_for_model_name='instance_name'
devices_type_names.append(device_type_name)
pi_prior[device_type_name]=np.array([0.5,0.5])
a_prior[device_type_name]=np.array([[0.8,0.2],[0.2,0.8]])
mean_prior[device_type_name]=np.array([[0],[300]])
cov_prior[device_type_name]=np.tile(np.identity(1), (2, 1, 1))
devices_models[device_type_name]=getAllInstanceModels(devices_types[device_type_name],device_type_name,key_for_model_name,pi_prior[device_type_name],a_prior[device_type_name],mean_prior[device_type_name],cov_prior[device_type_name])
best_model[device_type_name]=getBestInstanceModel(devices_models[device_type_name],devices_types[device_type_name],key_for_model_name)


   Model_Instance  Avg Probability
0  Washingmachine       216.586036
2          B8121D      -573.426889
1          7297E3     -1637.979680
Washingmachine is best.

In [30]:
#Aggregating Signals using the first instance in each ordered dict.
power_total=np.zeros((96,1))
time_total=[]
test_data={}
test_time={}
#Create generic time array
today=datetime.datetime.min
mins_15 = datetime.timedelta(minutes=15)
for i,val in enumerate(power_total):
    time_total.append((today+i*mins_15).time())
    
plt.figure()

for device_name in devices_type_names:
    print device_name
    device_type=devices_types[device_name]
    test_data[device_name]=values_to_array(device_type.instances[0].traces[0].series)
    test_time[device_name]=device_type.instances[0].traces[0].series.index.time
    power_total=power_total+test_data[device_name]
    plt.plot(test_time[device_name],test_data[device_name],label=device_name)
plt.plot(time_total,power_total,label='total');
plt.title("Simulated total energy")
plt.xlabel("Time")
plt.ylabel("Energy (Wh)");
plt.legend(bbox_to_anchor=(0., 1.05, 1., .102), loc=3,
       ncol=2, mode="expand", borderaxespad=0.)


Cookingstove
Dishwasher
refrigerator_good
Washingmachine_small
Washingmachine_large
Out[30]:
<matplotlib.legend.Legend at 0x7ff3e89ba110>

In [31]:
#Creates FHMM
list_pi=[pi_prior[appliance] for appliance in best_model]
list_A=[a_prior[appliance] for appliance in best_model]
list_means=[mean_prior[appliance].flatten().tolist() for appliance in best_model]
pi_combined=compute_pi_fhmm(list_pi)
A_combined=compute_A_fhmm(list_A)
[mean_combined, cov_combined]=compute_means_fhmm(list_means)
model_fhmm=create_combined_hmm(len(pi_combined),pi_combined, A_combined, mean_combined, cov_combined)

power_total_minus_bottom=[]
for i in power_total:
    power_total_minus_bottom.append(i-power_total.min())
learnt_states=model_fhmm.predict(power_total_minus_bottom)
[decoded_states, decoded_power]=decode_hmm(len(learnt_states), mean_prior, [appliance for appliance in best_model], learnt_states)


[(0, 0, 0, 0, 0), (0, 0, 0, 0, 300), (0, 0, 0, 50, 0), (0, 0, 0, 50, 300), (0, 0, 4, 0, 0), (0, 0, 4, 0, 300), (0, 0, 4, 50, 0), (0, 0, 4, 50, 300), (0, 0, 12, 0, 0), (0, 0, 12, 0, 300), (0, 0, 12, 50, 0), (0, 0, 12, 50, 300), (0, 300, 0, 0, 0), (0, 300, 0, 0, 300), (0, 300, 0, 50, 0), (0, 300, 0, 50, 300), (0, 300, 4, 0, 0), (0, 300, 4, 0, 300), (0, 300, 4, 50, 0), (0, 300, 4, 50, 300), (0, 300, 12, 0, 0), (0, 300, 12, 0, 300), (0, 300, 12, 50, 0), (0, 300, 12, 50, 300), (0, 600, 0, 0, 0), (0, 600, 0, 0, 300), (0, 600, 0, 50, 0), (0, 600, 0, 50, 300), (0, 600, 4, 0, 0), (0, 600, 4, 0, 300), (0, 600, 4, 50, 0), (0, 600, 4, 50, 300), (0, 600, 12, 0, 0), (0, 600, 12, 0, 300), (0, 600, 12, 50, 0), (0, 600, 12, 50, 300), (35, 0, 0, 0, 0), (35, 0, 0, 0, 300), (35, 0, 0, 50, 0), (35, 0, 0, 50, 300), (35, 0, 4, 0, 0), (35, 0, 4, 0, 300), (35, 0, 4, 50, 0), (35, 0, 4, 50, 300), (35, 0, 12, 0, 0), (35, 0, 12, 0, 300), (35, 0, 12, 50, 0), (35, 0, 12, 50, 300), (35, 300, 0, 0, 0), (35, 300, 0, 0, 300), (35, 300, 0, 50, 0), (35, 300, 0, 50, 300), (35, 300, 4, 0, 0), (35, 300, 4, 0, 300), (35, 300, 4, 50, 0), (35, 300, 4, 50, 300), (35, 300, 12, 0, 0), (35, 300, 12, 0, 300), (35, 300, 12, 50, 0), (35, 300, 12, 50, 300), (35, 600, 0, 0, 0), (35, 600, 0, 0, 300), (35, 600, 0, 50, 0), (35, 600, 0, 50, 300), (35, 600, 4, 0, 0), (35, 600, 4, 0, 300), (35, 600, 4, 50, 0), (35, 600, 4, 50, 300), (35, 600, 12, 0, 0), (35, 600, 12, 0, 300), (35, 600, 12, 50, 0), (35, 600, 12, 50, 300), (60, 0, 0, 0, 0), (60, 0, 0, 0, 300), (60, 0, 0, 50, 0), (60, 0, 0, 50, 300), (60, 0, 4, 0, 0), (60, 0, 4, 0, 300), (60, 0, 4, 50, 0), (60, 0, 4, 50, 300), (60, 0, 12, 0, 0), (60, 0, 12, 0, 300), (60, 0, 12, 50, 0), (60, 0, 12, 50, 300), (60, 300, 0, 0, 0), (60, 300, 0, 0, 300), (60, 300, 0, 50, 0), (60, 300, 0, 50, 300), (60, 300, 4, 0, 0), (60, 300, 4, 0, 300), (60, 300, 4, 50, 0), (60, 300, 4, 50, 300), (60, 300, 12, 0, 0), (60, 300, 12, 0, 300), (60, 300, 12, 50, 0), (60, 300, 12, 50, 300), (60, 600, 0, 0, 0), (60, 600, 0, 0, 300), (60, 600, 0, 50, 0), (60, 600, 0, 50, 300), (60, 600, 4, 0, 0), (60, 600, 4, 0, 300), (60, 600, 4, 50, 0), (60, 600, 4, 50, 300), (60, 600, 12, 0, 0), (60, 600, 12, 0, 300), (60, 600, 12, 50, 0), (60, 600, 12, 50, 300)]
108

In [32]:
plt.figure()
plt.plot(time_total,power_total_minus_bottom)
plt.title('Aggregated Energy without constant power')
plt.ylabel('Energy (Wh)')
plt.xlabel('Time')

for i,device_type in enumerate(best_model):
    plt.figure()
    #plt.subplot(2,1,1)
    plt.plot(test_time[device_type],test_data[device_type])
    plt.title('Ground Truth Energy for %s' %device_type)
    #plt.subplot(2,1,2)
    plt.plot(time_total,decoded_power[device_type])
    plt.title('Predicted Energy for %s' %device_type)
    plt.ylabel('Energy (Wh)')
    plt.xlabel('Time')
    plt.ylim((np.min(test_data[device_type])-10, np.max(test_data[device_type])+10))
    plt.tight_layout()



In [33]:
for device_type in devices_type_names:
    truth_states=da.evaluation_metrics.guess_truth_from_power(test_data[device_type],2)
    eval_metrics=da.evaluation_metrics.get_positive_negative_stats(truth_states,decoded_states[device_type])
    print device_type
    print 'precision:' + str(da.evaluation_metrics.get_precision(eval_metrics['tp'],eval_metrics['fp'])*100)+'%'
    print 'recall:' + str(da.evaluation_metrics.get_sensitivity(eval_metrics['tp'],eval_metrics['fn'])*100)+'%'
    print 'diff in power:' + str(da.evaluation_metrics.sum_error(test_data[device_type],decoded_power[device_type]))+'Wh'
    print


Cookingstove
precision:75.0%
recall:33.3333333333%
diff in power:165.045555556Wh

Dishwasher
precision:100.0%
recall:66.6666666667%
diff in power:982.058888889Wh

refrigerator_good
precision:89.0909090909%
recall:94.2307692308%
diff in power:97.9080555556Wh

Washingmachine_small
precision:0.0%
recall:0.0%
diff in power:187.018333333Wh

Washingmachine_large
precision:75.0%
recall:60.0%
diff in power:865.768888889Wh