In [1]:
import numpy as np
import pandas as pd
from os.path import join

from pylab import rcParams
import matplotlib.pyplot as plt
%matplotlib inline
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation, fhmm_exact
from nilmtk.utils import print_dict
from nilmtk.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")


Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 26 days

Comparing FHMM and CO

Downloading data

The full data set can be downloaded from the remote WikiEnergy database. The credentials are omitted here for security reasons.


In [2]:
# download_wikienergy(database_username, database_password, hdf_filename)

Loading data


In [3]:
data_dir = '/Users/nipunbatra/Dropbox/nilmtk_datasets/'
we = DataSet(join(data_dir, 'redd.h5'))
print('loaded ' + str(len(we.buildings)) + ' buildings')


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-3-3ab466afbfc0> in <module>()
      1 data_dir = '/Users/nipunbatra/Dropbox/nilmtk_datasets/'
----> 2 we = DataSet(join(data_dir, 'redd.h5'))
      3 print('loaded ' + str(len(we.buildings)) + ' buildings')

/Users/nipunbatra/git/nilmtk/nilmtk/dataset.pyc in __init__(self, filename, format)
     42         self.metadata = {}
     43         if filename is not None:
---> 44             self.import_metadata(get_datastore(filename, format))
     45 
     46     def import_metadata(self, store):

/Users/nipunbatra/git/nilmtk/nilmtk/utils.pyc in get_datastore(filename, format, mode)
    310     if filename is not None:
    311         if format == 'HDF':
--> 312             return HDFDataStore(filename, mode)
    313         elif format == 'CSV':
    314             return CSVDataStore(filename)

/Users/nipunbatra/git/nilmtk/nilmtk/docinherit.pyc in f(*args, **kwargs)
     44         @wraps(self.mthd, assigned=('__name__','__module__'))
     45         def f(*args, **kwargs):
---> 46             return self.mthd(obj, *args, **kwargs)
     47 
     48         return self.use_parent_doc(f, overridden)

/Users/nipunbatra/git/nilmtk/nilmtk/datastore/hdfdatastore.pyc in __init__(self, filename, mode)
     27     def __init__(self, filename, mode='a'):
     28         if mode == 'a' and not isfile(filename):
---> 29             raise IOError("No such file as " + filename)
     30         self.store = pd.HDFStore(filename, mode, complevel=9, complib='blosc')
     31         super(HDFDataStore, self).__init__()

IOError: No such file as /Users/nipunbatra/Dropbox/nilmtk_datasets/redd.h5

In [4]:
building_number = 1
print_dict(we.buildings[building_number].metadata)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-fe1eb293ce97> in <module>()
      1 building_number = 1
----> 2 print_dict(we.buildings[building_number].metadata)

NameError: name 'we' is not defined

Examine sub-metered appliances


In [5]:
elec = we.buildings[building_number].elec
elec.appliances


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-e4ec6f193a95> in <module>()
----> 1 elec = we.buildings[building_number].elec
      2 elec.appliances

NameError: name 'we' is not defined

Dividing data set into train and test


In [6]:
train = DataSet("/Users/nipunbatra/Dropbox/nilmtk_datasets/redd.h5")
test = DataSet("/Users/nipunbatra/Dropbox/nilmtk_datasets/redd.h5")

In [7]:
train.buildings[1].elec.mains().plot()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x10a8a3d50>

Let's split data at April 30th


In [8]:
train.set_window(end="30-4-2011")
test.set_window(start="30-4-2011")

In [9]:
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec

In [10]:
train_elec.mains().plot()


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ca3a850>

In [11]:
test_elec.mains().plot()


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b7b2e90>

Checking load_kwargs


In [12]:
m = train_elec['fridge']

In [13]:
df_raw = m.load().next()

In [14]:
df_raw.head(10)


Out[14]:
physical_quantity power
type active
2011-04-18 09:22:13-04:00 6
2011-04-18 09:22:16-04:00 6
2011-04-18 09:22:20-04:00 6
2011-04-18 09:22:23-04:00 6
2011-04-18 09:22:26-04:00 6
2011-04-18 09:22:30-04:00 6
2011-04-18 09:22:33-04:00 6
2011-04-18 09:22:37-04:00 6
2011-04-18 09:22:40-04:00 6
2011-04-18 09:22:44-04:00 6

In [15]:
df_1_min = m.power_series(sample_period=60).next()

In [16]:
df_1_min.head()


Out[16]:
2011-04-18 09:22:00-04:00    6.0000
2011-04-18 09:23:00-04:00    6.0000
2011-04-18 09:24:00-04:00    6.0000
2011-04-18 09:25:00-04:00    5.9375
2011-04-18 09:26:00-04:00    5.9375
Freq: 60S, Name: (power, active), dtype: float64

Selecting top-5 appliances


In [17]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)


15/16 MeterGroup(meters=
  ElecMeter(instance=3, building=1, dataset='REDD', appliances=[Appliance(type='electric oven', instance=1)])
  ElecMeter(instance=4, building=1, dataset='REDD', appliances=[Appliance(type='electric oven', instance=1)])
16/16 MeterGroup(meters=
  ElecMeter(instance=10, building=1, dataset='REDD', appliances=[Appliance(type='washer dryer', instance=1)])
  ElecMeter(instance=20, building=1, dataset='REDD', appliances=[Appliance(type='washer dryer', instance=1)])
Calculating total_energy for ElecMeterID(instance=20, building=1, dataset='REDD') ...   

In [18]:
top_5_train_elec


Out[18]:
MeterGroup(meters=
  ElecMeter(instance=5, building=1, dataset='REDD', appliances=[Appliance(type='fridge', instance=1)])
  ElecMeter(instance=11, building=1, dataset='REDD', appliances=[Appliance(type='microwave', instance=1)])
  ElecMeter(instance=8, building=1, dataset='REDD', appliances=[Appliance(type='sockets', instance=2)])
  ElecMeter(instance=9, building=1, dataset='REDD', appliances=[Appliance(type='light', instance=1)])
  ElecMeter(instance=6, building=1, dataset='REDD', appliances=[Appliance(type='dish washer', instance=1)])
)

Training and disaggregation

FHMM

Let us first train on raw data, no downsampling and see how much time it takes


In [19]:
import time

In [20]:
"""start=time.time()
fhmm = fhmm_exact.FHMM()
fhmm.train(top_5_train_elec)
end=time.time()
print end-start
"""


Out[20]:
'start=time.time()\nfhmm = fhmm_exact.FHMM()\nfhmm.train(top_5_train_elec)\nend=time.time()\nprint end-start\n'

In [21]:
start=time.time()
fhmm = fhmm_exact.FHMM()
fhmm.train(top_5_train_elec, sample_period=60)
end=time.time()
print end-start


Training model for submeter 'ElecMeter(instance=5, building=1, dataset='REDD', appliances=[Appliance(type='fridge', instance=1)])'
Training model for submeter 'ElecMeter(instance=11, building=1, dataset='REDD', appliances=[Appliance(type='microwave', instance=1)])'
Training model for submeter 'ElecMeter(instance=8, building=1, dataset='REDD', appliances=[Appliance(type='sockets', instance=2)])'
Training model for submeter 'ElecMeter(instance=9, building=1, dataset='REDD', appliances=[Appliance(type='light', instance=1)])'
Training model for submeter 'ElecMeter(instance=6, building=1, dataset='REDD', appliances=[Appliance(type='dish washer', instance=1)])'
13.5836830139

In [22]:
disag_filename = join(data_dir, 'redd-disag-fhmm-new.h5')
output = HDFDataStore(disag_filename, 'w')
fhmm.disaggregate(test_elec.mains(), output, sample_period=60)
output.close()


Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in Exception tables.exceptions.HDF5ExtError: HDF5ExtError('Problems closing the Group None',) in 

In [23]:
disag_filename = join(data_dir, 'redd-disag-fhmm-new.h5')
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec

In [24]:
disag_elec.plot()


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x10cbaebd0>

In [25]:
f1 = f1_score(disag_elec, test_elec)
f1.index = disag_elec.get_labels(f1.index)
f1.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("FHMM");



In [17]:
# CSS styling
from IPython.core.display import display, HTML
display(HTML(open('static/styles.css', 'r').read()));