In [1]:
import numpy as np
import pandas as pd
from os.path import join

from pylab import rcParams
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

rcParams['figure.figsize'] = (14, 7)

import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation
from nilmtk.utils import print_dict
from nilmtk.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

Demo of NILMTK v0.2 for BuildSys 2014

Downloading data

The full data set can be downloaded from the remote WikiEnergy database. The credentials are omitted here for security reasons.


In [2]:
# download_wikienergy(database_username, database_password, hdf_filename)

Loading data


In [3]:
data_dir = '/home/nipun/datasets'
we = DataSet(join(data_dir, 'wikienergy.h5'))
print('loaded ' + str(len(we.buildings)) + ' buildings')


loaded 239 buildings

Examine dataset metadata


In [4]:
print_dict(we.metadata)


Examine metadata for a single house


In [41]:
building_number = 22
print_dict(we.buildings[building_number].metadata)


  • instance: 22
  • dataset: WikiEnergy
  • original_name: 774

Examine sub-metered appliances


In [42]:
elec = we.buildings[building_number].elec
elec.appliances


Out[42]:
[Appliance(type='fridge', instance=1),
 Appliance(type='dish washer', instance=1),
 Appliance(type='appliance', instance=1),
 Appliance(type='electric furnace', instance=1),
 Appliance(type='sockets', instance=1),
 Appliance(type='microwave', instance=1),
 Appliance(type='sockets', instance=2),
 Appliance(type='air conditioner', instance=1)]

Wiring hierarchy of meters


In [43]:
elec.draw_wiring_graph()


Select all fridges


In [44]:
fridges = nilmtk.global_meter_group.select_using_appliances(type='fridge')

Proportion of energy per fridge

The energy consumed by each appliance can be expressed as a proportion of the household's total energy. Here we find the range of proportions for each fridge.


In [9]:
# Select a subset of fridges, otherwise the computation takes a long time
fridges_restricted = MeterGroup(fridges.meters[:5])

proportion_per_fridge = fridges_restricted.proportion_of_upstream_total_per_meter()


Using cached result from metadata.
Using cached result from metadata.
1/5 ElecMeter(instance=18, building=60, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)]) = 0.049Using cached result from metadata.
Using cached result from metadata.
2/5 ElecMeter(instance=14, building=61, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)]) = 0.097Using cached result from metadata.
Using cached result from metadata.
3/5 ElecMeter(instance=8, building=63, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)]) = 0.000Using cached result from metadata.
Using cached result from metadata.
4/5 ElecMeter(instance=6, building=64, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)]) = 0.096Using cached result from metadata.
Using cached result from metadata.
5/5 ElecMeter(instance=16, building=66, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)]) = 0.083

In [10]:
proportion_per_fridge.plot(kind='bar');
plt.title('fridge energy as proportion of total building energy');
plt.ylabel('proportion');
plt.xlabel('Fridge (<appliance instance>, <building instance>, <dataset name>)');



In [11]:
# How much energy does the largest-consuming fridge consume in kWh?
fridges.select(building=61).total_energy()


Using cached result from metadata.
Out[11]:
active    79.120667
dtype: float64

In [12]:
fridges.select(building=61).plot();


Daily energy consumption across fridges in WikiEnergy data set


In [13]:
fridges_restricted = MeterGroup(fridges.meters[:20])
daily_energy = pd.Series([meter.average_energy_per_period(offset_alias='D') 
                           for meter in fridges_restricted.meters])

daily_energy.plot(kind='hist');
plt.title('Histogram of daily fridge energy');
plt.xlabel('energy (kWh)');
plt.ylabel('occurences');
plt.legend().set_visible(False)


Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
Using cached result from metadata.
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-abb8c909a1b8> in <module>()
      3                            for meter in fridges_restricted.meters])
      4 
----> 5 daily_energy.plot(kind='hist');
      6 plt.title('Histogram of daily fridge energy');
      7 plt.xlabel('energy (kWh)');

/home/nipun/anaconda/lib/python2.7/site-packages/pandas/tools/plotting.pyc in plot_series(data, kind, ax, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, label, secondary_y, **kwds)
   2485                  yerr=yerr, xerr=xerr,
   2486                  label=label, secondary_y=secondary_y,
-> 2487                  **kwds)
   2488 
   2489 

/home/nipun/anaconda/lib/python2.7/site-packages/pandas/tools/plotting.pyc in _plot(data, x, y, subplots, ax, kind, **kwds)
   2291         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   2292 
-> 2293     plot_obj.generate()
   2294     plot_obj.draw()
   2295     return plot_obj.result

/home/nipun/anaconda/lib/python2.7/site-packages/pandas/tools/plotting.pyc in generate(self)
    915 
    916     def generate(self):
--> 917         self._args_adjust()
    918         self._compute_plot_data()
    919         self._setup_subplots()

/home/nipun/anaconda/lib/python2.7/site-packages/pandas/tools/plotting.pyc in _args_adjust(self)
   1914             hist, self.bins = np.histogram(values, bins=self.bins,
   1915                                         range=self.kwds.get('range', None),
-> 1916                                         weights=self.kwds.get('weights', None))
   1917 
   1918         if com.is_list_like(self.bottom):

/home/nipun/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in histogram(a, bins, range, normed, weights, density)
    185                 range = (0, 1)
    186             else:
--> 187                 range = (a.min(), a.max())
    188         mn, mx = [mi + 0.0 for mi in range]
    189         if mn == mx:

/home/nipun/anaconda/lib/python2.7/site-packages/numpy/core/_methods.pyc in _amin(a, axis, out, keepdims)
     27 
     28 def _amin(a, axis=None, out=None, keepdims=False):
---> 29     return umr_minimum(a, axis, None, out, keepdims)
     30 
     31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False):

/home/nipun/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in __nonzero__(self)
    690         raise ValueError("The truth value of a {0} is ambiguous. "
    691                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 692                          .format(self.__class__.__name__))
    693 
    694     __bool__ = __nonzero__

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

Plot sub-metered data for a single day


In [85]:
df  = elec.power_series_all_data()

In [86]:
print([meter.appliance_label() for meter in elec.meters[1:]])


["('air conditioner', 1)", "('dish washer', 1)", "('electric furnace', 1)", "('sockets', 1)", "('sockets', 2)", "('microwave', 1)", "('fridge', 1)", "('appliance', 1)"]

In [87]:
df = pd.DataFrame({meter.appliance_label():meter.power_series_all_data() for meter in elec.meters[1:]}, index=elec.meters[1].power_series_all_data().index)

In [88]:
df


Out[88]:
('air conditioner', 1) ('appliance', 1) ('dish washer', 1) ('electric furnace', 1) ('fridge', 1) ('microwave', 1) ('sockets', 1) ('sockets', 2)
localminute
2014-05-04 12:00:00-05:00 3805 0 0 859 86 14 0 0
2014-05-04 12:01:00-05:00 3809 0 0 858 93 14 0 0
2014-05-04 12:02:00-05:00 3794 0 0 857 87 14 0 0
2014-05-04 12:03:00-05:00 3828 0 0 858 88 14 0 0
2014-05-04 12:04:00-05:00 3824 0 0 858 88 14 0 0
2014-05-04 12:05:00-05:00 1620 0 0 858 89 14 0 0
2014-05-04 12:06:00-05:00 0 0 0 841 89 14 0 0
2014-05-04 12:07:00-05:00 0 0 0 628 89 14 0 0
2014-05-04 12:08:00-05:00 0 0 0 627 89 14 0 0
2014-05-04 12:09:00-05:00 0 0 0 626 90 14 0 0
2014-05-04 12:10:00-05:00 0 0 0 626 90 14 0 0
2014-05-04 12:11:00-05:00 0 0 0 625 91 14 0 0
2014-05-04 12:12:00-05:00 2778 0 0 782 91 14 0 0
2014-05-04 12:13:00-05:00 3834 0 0 851 91 14 0 0
2014-05-04 12:14:00-05:00 3859 0 0 850 91 14 0 0
2014-05-04 12:15:00-05:00 3868 0 0 849 91 14 0 0
2014-05-04 12:16:00-05:00 3853 0 0 845 92 14 0 0
2014-05-04 12:17:00-05:00 3807 0 0 846 92 14 0 0
2014-05-04 12:18:00-05:00 3833 0 0 845 92 14 0 0
2014-05-04 12:19:00-05:00 3799 0 0 846 92 14 0 0
2014-05-04 12:20:00-05:00 3830 0 0 846 92 14 0 0
2014-05-04 12:21:00-05:00 3840 0 0 850 92 14 0 0
2014-05-04 12:22:00-05:00 3837 0 0 854 92 14 0 0
2014-05-04 12:23:00-05:00 3813 0 0 854 91 14 0 0
2014-05-04 12:24:00-05:00 3799 0 0 854 91 14 0 0
2014-05-04 12:25:00-05:00 1749 0 0 855 91 14 0 0
2014-05-04 12:26:00-05:00 0 0 0 845 91 14 0 0
2014-05-04 12:27:00-05:00 0 0 0 627 93 14 0 0
2014-05-04 12:28:00-05:00 0 0 0 625 94 14 0 0
2014-05-04 12:29:00-05:00 0 0 0 623 123 14 0 0
... ... ... ... ... ... ... ... ...
2014-05-04 15:00:00-05:00 3997 0 0 843 100 14 0 0
2014-05-04 15:01:00-05:00 3966 0 0 843 100 14 0 0
2014-05-04 15:02:00-05:00 3962 0 0 840 100 14 0 0
2014-05-04 15:03:00-05:00 3997 0 0 839 100 14 0 0
2014-05-04 15:04:00-05:00 3970 0 0 837 96 14 0 0
2014-05-04 15:05:00-05:00 3956 0 0 836 95 14 0 0
2014-05-04 15:06:00-05:00 3997 0 0 834 94 14 0 0
2014-05-04 15:07:00-05:00 3985 0 0 834 94 14 0 0
2014-05-04 15:08:00-05:00 4003 0 0 834 94 14 0 0
2014-05-04 15:09:00-05:00 4010 0 0 833 94 14 0 0
2014-05-04 15:10:00-05:00 3986 0 0 835 94 14 0 0
2014-05-04 15:11:00-05:00 3958 0 0 837 94 14 0 0
2014-05-04 15:12:00-05:00 3968 0 0 837 94 14 0 0
2014-05-04 15:13:00-05:00 3950 0 0 838 94 14 0 0
2014-05-04 15:14:00-05:00 3983 0 0 839 94 14 0 0
2014-05-04 15:15:00-05:00 4010 0 0 841 94 14 0 0
2014-05-04 15:16:00-05:00 3963 0 0 841 94 14 0 0
2014-05-04 15:17:00-05:00 4007 0 0 841 94 14 0 0
2014-05-04 15:18:00-05:00 3991 0 0 842 94 14 0 0
2014-05-04 15:19:00-05:00 3971 0 0 842 93 14 0 0
2014-05-04 15:20:00-05:00 3958 0 0 843 93 14 0 0
2014-05-04 15:21:00-05:00 3969 0 0 844 93 14 0 0
2014-05-04 15:22:00-05:00 3969 0 0 843 93 14 0 0
2014-05-04 15:23:00-05:00 3951 0 0 840 93 14 0 0
2014-05-04 15:24:00-05:00 3983 0 0 839 93 14 0 0
2014-05-04 15:25:00-05:00 3974 0 0 839 93 14 0 0
2014-05-04 15:26:00-05:00 3968 0 0 837 93 14 0 0
2014-05-04 15:27:00-05:00 3981 0 0 835 93 14 0 0
2014-05-04 15:28:00-05:00 3978 0 0 834 93 14 0 0
2014-05-04 15:29:00-05:00 3992 0 0 834 93 14 0 0

210 rows × 8 columns


In [89]:
x=np.arange(10)
ys = [i+x+(i*x)**2 for i in range(10)]

from matplotlib import cm
colors = iter(cm.rainbow(np.linspace(0, 1, len(ys))))
for y in ys:
    plt.scatter(x, y, color=next(colors))



In [90]:
df.mean()


Out[90]:
('air conditioner', 1)     3376.033333
('appliance', 1)             12.080952
('dish washer', 1)          233.214286
('electric furnace', 1)     821.609524
('fridge', 1)                97.666667
('microwave', 1)             14.038095
('sockets', 1)                0.000000
('sockets', 2)               15.995238
dtype: float64

In [91]:
sns.set_palette("Set3", n_colors=12)
df.plot(kind='area')
plt.xlabel("Time")
plt.ylabel("Power (W)")
plt.tight_layout()
plt.savefig("submetered.png")



In [21]:
sns.timeseries(df)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-21-4a9cb826f9da> in <module>()
----> 1 sns.timeseries(df)

TypeError: 'module' object is not callable

In [84]:
we.store.window = TimeFrame(start='2014-05-04 12:00:00-05:00', end='2014-05-04 15:30:00-05:00')
#elec.plot(kind='area');

Plot fraction of energy consumption of each appliance


In [24]:
sns.palplot(sns.color_palette("Set3", 12))



In [25]:
sns.choose_colorbrewer_palette()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-25-4e5f328279d6> in <module>()
----> 1 sns.choose_colorbrewer_palette()

AttributeError: 'module' object has no attribute 'choose_colorbrewer_palette'

In [93]:
fraction = elec.submeters().fraction_per_meter().dropna()


8/8 ElecMeter(instance=9, building=22, dataset='WikiEnergy', appliances=[Appliance(type='appliance', instance=1)])

In [94]:
# Create convenient labels
labels = elec.get_appliance_labels(fraction.index)
plt.figure(figsize=(8,8))
fraction.plot(kind='pie', labels=labels);


Select meters on the basic of appliance category


In [17]:
elec.select_using_appliances(category='heating')


Out[17]:
MeterGroup(meters=
  ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])
  ElecMeter(instance=11, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric water heating appliance', instance=1)])
)

In [18]:
# Find all appliances with a particular type of motor
elec.select_using_appliances(category='single-phase induction motor')


Out[18]:
MeterGroup(meters=
  ElecMeter(instance=2, building=11, dataset='WikiEnergy', appliances=[Appliance(type='air conditioner', instance=1)])
  ElecMeter(instance=5, building=11, dataset='WikiEnergy', appliances=[Appliance(type='dish washer', instance=1)])
  ElecMeter(instance=6, building=11, dataset='WikiEnergy', appliances=[Appliance(type='spin dryer', instance=1)])
  ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])
  ElecMeter(instance=10, building=11, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)])
)

Training and disaggregation


In [19]:
# Train
co = CombinatorialOptimisation()
co.train(elec)

In [20]:
# Disaggregate
disag_filename = join(data_dir, 'wikienergy-disag.h5')
output = HDFDataStore(disag_filename, 'w')
co.disaggregate(elec.mains(), output)
output.close()


vampire_power = 321.0 watts

Alternatively, a model could be specified manually:

co.model = [
    {'states': [0,  100], 'training_metadata': ('television', 1)},
    {'states': [0, 2000], 'training_metadata': ('electric furnace', 1)}
    ]

Examine learned model


In [21]:
for model in co.model:
    print_dict(model)


  • states: [ 0 887 2970]
  • training_metadata: ElecMeter(instance=2, building=11, dataset='WikiEnergy', appliances=[Appliance(type='air conditioner', instance=1)])
  • states: [ 0 13 77]
  • training_metadata: ElecMeter(instance=3, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=1)])
  • states: [ 0 11 12]
  • training_metadata: ElecMeter(instance=4, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=2)])
  • states: [ 0 197 1030]
  • training_metadata: ElecMeter(instance=5, building=11, dataset='WikiEnergy', appliances=[Appliance(type='dish washer', instance=1)])
  • states: [0]
  • training_metadata: ElecMeter(instance=6, building=11, dataset='WikiEnergy', appliances=[Appliance(type='spin dryer', instance=1)])
  • states: [ 0 15 520]
  • training_metadata: ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])
  • states: [ 0 34 789]
  • training_metadata: ElecMeter(instance=8, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=3)])
  • states: [ 0 49 52]
  • training_metadata: ElecMeter(instance=9, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=4)])
  • states: [ 0 138 463]
  • training_metadata: ElecMeter(instance=10, building=11, dataset='WikiEnergy', appliances=[Appliance(type='fridge', instance=1)])
  • states: [ 0 17 34]
  • training_metadata: ElecMeter(instance=11, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric water heating appliance', instance=1)])

Examine disaggregated data


In [22]:
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec
disag_elec.plot()
disag.store.close()


Calculate accuracy of disaggregation


In [23]:
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec

f1 = f1_score(disag_elec, elec)
f1.index = disag_elec.get_appliance_labels(f1.index)
f1.plot(kind='bar')
plt.xlabel('appliance');
plt.ylabel('f-score');

disag.store.close()