In [1]:
from __future__ import print_function, division
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from six import iteritems
%matplotlib inline
rcParams['figure.figsize'] = (13, 6)
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils
import warnings
warnings.filterwarnings("ignore")
In [2]:
train = DataSet('../datasets/REDD/low_freq.h5')
test = DataSet('../datasets/REDD/low_freq.h5')
Let us use building 1 for demo purposes
In [3]:
building = 1
Let's split data at April 30th
In [4]:
# The dates are interpreted by Pandas, prefer using ISO dates (yyyy-mm-dd)
train.set_window(end="2011-04-30")
test.set_window(start="2011-04-30")
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec
In [5]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)
In [6]:
def predict(clf, test_elec, sample_period, timezone):
pred = {}
gt= {}
for i, chunk in enumerate(test_elec.mains().load(sample_period=sample_period)):
chunk_drop_na = chunk.dropna()
pred[i] = clf.disaggregate_chunk(chunk_drop_na)
gt[i]={}
for meter in test_elec.submeters().meters:
# Only use the meters that we trained on (this saves time!)
gt[i][meter] = next(meter.load(sample_period=sample_period))
gt[i] = pd.DataFrame({k:v.squeeze() for k,v in iteritems(gt[i]) if len(v)}, index=next(iter(gt[i].values())).index).dropna()
# If everything can fit in memory
gt_overall = pd.concat(gt)
gt_overall.index = gt_overall.index.droplevel()
pred_overall = pd.concat(pred)
pred_overall.index = pred_overall.index.droplevel()
# Having the same order of columns
gt_overall = gt_overall[pred_overall.columns]
#Intersection of index
gt_index_utc = gt_overall.index.tz_convert("UTC")
pred_index_utc = pred_overall.index.tz_convert("UTC")
common_index_utc = gt_index_utc.intersection(pred_index_utc)
common_index_local = common_index_utc.tz_convert(timezone)
gt_overall = gt_overall.loc[common_index_local]
pred_overall = pred_overall.loc[common_index_local]
appliance_labels = [m for m in gt_overall.columns.values]
gt_overall.columns = appliance_labels
pred_overall.columns = appliance_labels
return gt_overall, pred_overall
In [7]:
# Since the methods use randomized initialization, let's fix a seed here
# to make this notebook reproducible
import numpy.random
numpy.random.seed(42)
In [ ]:
classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()}
predictions = {}
sample_period = 120
for clf_name, clf in classifiers.items():
print("*"*20)
print(clf_name)
print("*" *20)
clf.train(top_5_train_elec, sample_period=sample_period)
gt, predictions[clf_name] = predict(clf, test_elec, 120, train.metadata['timezone'])
In [ ]:
rmse = {}
for clf_name in classifiers.keys():
rmse[clf_name] = nilmtk.utils.compute_rmse(gt, predictions[clf_name], pretty=True)
rmse = pd.DataFrame(rmse)
In [ ]:
rmse
In [ ]: