In [1]:
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
%matplotlib inline
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation
In [2]:
train = DataSet('/data/REDD/redd.h5')
test = DataSet('/data/REDD/redd.h5')
Let us use building 1 for demo purposes
In [3]:
building = 1
In [4]:
train.buildings[building].elec.mains().plot()
Out[4]:
Let's split data at April 30th
In [5]:
train.set_window(end="30-4-2011")
test.set_window(start="30-4-2011")
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec
In [6]:
train_elec.mains().plot()
Out[6]:
In [7]:
test_elec.mains().plot()
Out[7]:
REDD data set has got appliance level data sampled every 3 or 4 seconds and mains data sampled every 1 second. Let us verify the same.
To allow disaggregation to be done on any arbitrarily large dataset, disaggregation output is dumped to disk chunk-by-chunk:
In [8]:
fridge_meter = train_elec['fridge']
In [9]:
fridge_df = fridge_meter.load().next()
In [10]:
fridge_df.head()
Out[10]:
In [11]:
mains = train_elec.mains()
In [12]:
mains_df = mains.load().next()
In [13]:
mains_df.head()
Out[13]:
Since, both of these are sampled at different frequencies, we will downsample both to 1 minute resolution. We will also select the top-5 appliances in terms of energy consumption and use them for training our FHMM and CO models.
In [14]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)
In [15]:
top_5_train_elec
Out[15]:
In [16]:
start=time.time()
from nilmtk.disaggregate import fhmm_exact
fhmm = fhmm_exact.FHMM()
# Note that we have given the sample period to downsample the data to 1 minute
fhmm.train(top_5_train_elec, sample_period=60)
end=time.time()
print end-start
In [17]:
disag_filename = '/data/REDD/redd-disag-fhmm.h5'
output = HDFDataStore(disag_filename, 'w')
# Note that we have mentioned to disaggregate after converting to a sample period of 60 seconds
fhmm.disaggregate(test_elec.mains(), output, sample_period=60)
output.close()
In [18]:
disag_fhmm = DataSet(disag_filename)
disag_fhmm_elec = disag_fhmm.buildings[building].elec
In [19]:
from nilmtk.metrics import f1_score
f1_fhmm = f1_score(disag_fhmm_elec, test_elec)
f1_fhmm.index = disag_fhmm_elec.get_labels(f1_fhmm.index)
f1_fhmm.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("FHMM");
In [20]:
start=time.time()
from nilmtk.disaggregate import CombinatorialOptimisation
co = CombinatorialOptimisation()
# Note that we have given the sample period to downsample the data to 1 minute
co.train(top_5_train_elec, sample_period=60)
end=time.time()
print end-start
In [22]:
disag_filename = '/data/REDD/redd-disag-co.h5'
output = HDFDataStore(disag_filename, 'w')
# Note that we have mentioned to disaggregate after converting to a sample period of 60 seconds
co.disaggregate(test_elec.mains(), output, sample_period=60)
output.close()
In [23]:
disag_co = DataSet(disag_filename)
disag_co_elec = disag_co.buildings[building].elec
In [24]:
from nilmtk.metrics import f1_score
f1_co= f1_score(disag_co_elec, test_elec)
f1_co.index = disag_co_elec.get_labels(f1_co.index)
f1_co.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("CO");