In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn import hmm
from sklearn.metrics import confusion_matrix
from collections import OrderedDict
import itertools
from copy import deepcopy
import pandas as pd
import matplotlib.pylab as pylab
import os
%matplotlib inline
In [2]:
#Produces arrays of arrays for HMM
def values_to_array(values):
a=[]
X=[]
for i in values:
a.append(i)
X.append(a)
a=[]
return np.array(X)
In [3]:
#Takes the JSON files and reloads them back into python
def open_instance_15min(device_type, instance_name,filename):
with open("Devices/{0}/{1}/{2}".format(device_type,instance_name,filename)) as f:
return pd.DataFrame(json.load(f)['time_15'])
In [4]:
def return_sorting_mapping(means):
means_copy = deepcopy(means)
# Sorting
means_copy = np.sort(means_copy, axis = 0)
# Finding mapping
mapping = {}
for i, val in enumerate(means_copy):
assert val==means[np.where(val==means)[0]]
mapping[i] = np.where(val==means)[0][0]
return mapping
In [5]:
def sort_startprob(mapping, startprob):
""" Sort the startprob according to power means; as returned by mapping
"""
num_elements = len(startprob)
new_startprob = np.zeros(num_elements)
for i in xrange(len(startprob)):
new_startprob[i] = startprob[mapping[i]]
return new_startprob
In [6]:
def sort_covars(mapping, covars):
num_elements = len(covars)
new_covars = np.zeros_like(covars)
for i in xrange(len(covars)):
new_covars[i] = covars[mapping[i]]
return new_covars
In [7]:
def sort_transition_matrix(mapping, A):
""" Sorts the transition matrix according to power means; as returned by mapping
"""
num_elements = len(A)
A_new = np.zeros((num_elements, num_elements))
for i in range(num_elements):
for j in range(num_elements):
A_new[i,j] = A[mapping[i], mapping[j]]
return A_new
In [8]:
def sort_learnt_parameters(startprob, means, covars, transmat):
mapping = return_sorting_mapping(means)
means_new = np.sort(means, axis = 0)
startprob_new = sort_startprob(mapping, startprob)
covars_new = sort_covars(mapping, covars)
transmat_new = sort_transition_matrix(mapping, transmat)
assert np.shape(means_new) == np.shape(means)
assert np.shape(startprob_new) == np.shape(startprob)
assert np.shape(transmat_new) == np.shape(transmat)
return [startprob_new, means_new, covars_new, transmat_new]
In [9]:
device={}
models={}
pi=OrderedDict()
a=OrderedDict()
mean=OrderedDict()
cov=OrderedDict()
model=OrderedDict()
sorted_model=OrderedDict()
power=OrderedDict()
state=OrderedDict()
device_name='Refrigerator'
directory= os.getcwd()+'/Devices/'+device_name+'/'
pi_prior=np.array([0.5,0.5])
a_prior=np.array([[0.95,0.05],[0.05,0.95]])
mean_prior=np.array([[0],[120]])
cov_prior=np.tile(np.identity(1), (2, 1, 1))
model['all']=hmm.GaussianHMM(pi_prior.size, "full", pi_prior,a_prior)
model['all'].means=mean_prior
model['all'].covars=cov_prior
for i,instance_name in enumerate(os.listdir(directory)):
device[instance_name] = [open_instance_15min(device_name,instance_name,filename)for filename in os.listdir(directory+'/'+instance_name)]
pi[instance_name]=pi_prior
a[instance_name]=a_prior
mean[instance_name]=mean_prior
cov[instance_name]=cov_prior
model[instance_name]=hmm.GaussianHMM(pi[instance_name].size, "full", pi[instance_name],a[instance_name])
model[instance_name].means_ = mean[instance_name]
model[instance_name].covars_ = cov[instance_name]
for l,trace in enumerate(device[instance_name][1:]):
trace_values=values_to_array(trace["values"].values)
model[instance_name].fit([trace_values])
startprob, means, covars, transmat = sort_learnt_parameters(model[instance_name].startprob_, model[instance_name].means_, model[instance_name].covars_ , model[instance_name].transmat_)
model[instance_name]=hmm.GaussianHMM(startprob.size, "full", startprob, transmat)
model[instance_name].means_ = means
model[instance_name].covars_ = covars
model['all'].fit([trace_values])
startprob, means, covars, transmat = sort_learnt_parameters(model['all'].startprob_, model['all'].means_, model['all'].covars_ , model['all'].transmat_)
model['all']=hmm.GaussianHMM(startprob.size, "full", startprob, transmat)
model['all'].means_ = means
model['all'].covars_ = covars
In [10]:
model.keys()
Out[10]:
In [11]:
other_instance='B7E6F4'
test=values_to_array(device[instance_name][0]['values'].values)
print instance_name+" model using test "+instance_name+" data: " + str(model[instance_name].score(test))
print other_instance+" model using "+instance_name+ " test data: " + str(model[other_instance].score(test))
print "All model using "+instance_name+" test data: " + str(model['all'].score(test))
power_s, state = model[instance_name].sample(96)
print
print instance_name+" model using samples from model: " + str(model[instance_name].score(power_s))
trained=values_to_array(device[instance_name][1]['values'].values)
print instance_name+" model using training data from model: " + str(model[instance_name].score(trained))
In [12]:
dfs_test={}
avg_prob={}
for key in model:
if(key!='all'):
test=values_to_array(device[key][0]['values'].values)
a=[]
for key2 in model:
val=model[key2].score(test)
a.append([key,key2,val])
dfs_test[key] = pd.DataFrame(data=a,columns=['Test_Instance','Model_Instance','Value'])
print dfs_test[key].sort('Value',ascending=False)
print
In [28]:
#Looking at each model, averaging how well they do for each test case and ranking the models
dfs_model={}
for key in model:
a=[]
for key2 in model:
if(key2!='all'):
test=values_to_array(device[key2][0]['values'].values)
val=model[key].score(test)
a.append([key2,key,val])
dfs_model[key] = pd.DataFrame(data=a,columns=['Test_Instance','Model_Instance','Value'])
#print dfs_model[key].sort('Value',ascending=False)
b=[]
for key in dfs_model:
sum=0
for row in dfs_model[key].iterrows():
sum=sum+row[1]['Value']
b.append([key,sum/len(dfs_model[key].index)])
avg_model = pd.DataFrame(data=b,columns=['Model_Instance','Avg Probability'])
print avg_model.sort('Avg Probability',ascending=False)
In [26]:
print dfs_model['Refrigerator'].sort('Value',ascending=False)
In [22]:
#Enter device name you would like to look at below
instance_name='76C07F'
test=values_to_array(device[instance_name][0]['values'].values)
A=50*model[instance_name].predict(test)
A_ag=50*model['all'].predict(test)
A_best=50*model['Refrigerator'].predict(test)
B=test
pylab.rcParams['figure.figsize'] = 16, 12
plt.plot(B,'r')
plt.plot(A_best,'g')
In [23]:
pylab.rcParams['figure.figsize'] = 16, 12
plt.plot(B,'r')
plt.plot(A_best,'g')
Out[23]:
In [ ]: