Disaggregation - Hart Active and Reactive data

Customary imports


In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from os.path import join
from pylab import rcParams
import matplotlib.pyplot as plt
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
#import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate.hart_85 import Hart85
from nilmtk.disaggregate import CombinatorialOptimisation
from nilmtk.utils import print_dict, show_versions
from nilmtk.metrics import f1_score
#import seaborn as sns
#sns.set_palette("Set3", n_colors=12)

import warnings
warnings.filterwarnings("ignore") #suppress warnings, comment out if warnings required

Show versions for any diagnostics


In [2]:
#uncomment if required
#show_versions()

Load dataset


In [3]:
data_dir = '/Users/GJWood/nilm_gjw_data/HDF5/'
gjw = DataSet(join(data_dir, 'nilm_gjw_data.hdf5'))
print('loaded ' + str(len(gjw.buildings)) + ' buildings')
building_number=1


loaded 1 buildings

Period of interest 4 days during normal week


In [4]:
gjw.set_window('2015-06-01 00:00:00', '2015-06-05 00:00:00')
elec = gjw.buildings[building_number].elec
mains = elec.mains()
house = elec['fridge'] #only one meter so any selection will do
df = house.load().next() #load the first chunk of data into a dataframe
df.info() #check that the data is what we want (optional)
#note the data has two columns and a time index


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 345600 entries, 2015-06-01 00:00:00+01:00 to 2015-06-04 23:59:59+01:00
Data columns (total 2 columns):
(power, reactive)    345600 non-null float32
(power, active)      345600 non-null float32
dtypes: float32(2)
memory usage: 5.3 MB

In [5]:
plotdata = df.ix['2015-06-01 00:00:00': '2015-07-06 00:00:00']
plotdata.plot()
plt.title("Raw Mains Usage")
plt.ylabel("Power (W)")
plt.xlabel("Time");



In [6]:
plt.scatter(plotdata[('power','active')],plotdata[('power','reactive')])
plt.title("Raw Mains Usage Signature Space")
plt.ylabel("Reactive Power (VAR)")
plt.xlabel("Active Power (W)");


Training

We'll now do the training from the aggregate data. The algorithm segments the time series data into steady and transient states. Thus, we'll first figure out the transient and the steady states. Next, we'll try and pair the on and the off transitions based on their proximity in time and value.


In [45]:
h = Hart85()
h.train(mains,cols=[('power','active'),('power','reactive')],min_tolerance=100,noise_level=70,buffer_size=20,state_threshold=15)


Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.

In [46]:
h.centroids


Out[46]:
(power, active) (power, reactive)
0 89.987508 105.169375
1 76.942336 540.287504
2 2086.828654 23.059838
3 719.493513 25.488081
4 2700.307958 44.042016
5 1329.800000 13.891310
6 789.500000 603.818910

In [47]:
plt.scatter(h.steady_states[('active average')],h.steady_states[('reactive average')])
plt.scatter(h.centroids[('power','active')],h.centroids[('power','reactive')],marker='x',c=(1.0, 0.0, 0.0))
plt.legend(['Steady states','Centroids'],loc=4)
plt.title("Training steady states Signature space")
plt.ylabel("Reactive average (VAR)")
plt.xlabel("Active average (W)");
labels = ['Centroid {0}'.format(i) for i in range(len(h.centroids))]
for label, x, y in zip(labels, h.centroids[('power','active')], h.centroids[('power','reactive')]):
    plt.annotate(
        label, 
        xy = (x, y), xytext = (-5, 5),
        textcoords = 'offset points', ha = 'right', va = 'bottom',
        bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5))



In [9]:
h.steady_states.head()


Out[9]:
active average reactive average
2015-06-01 00:08:45+01:00 843.000000 145.000
2015-06-01 00:33:35+01:00 365.000000 78.375
2015-06-01 00:33:52+01:00 365.000000 -336.000
2015-06-01 00:33:54+01:00 365.000000 94.000
2015-06-01 00:56:25+01:00 622.027559 199.000

In [10]:
h.steady_states.tail()


Out[10]:
active average reactive average
2015-06-04 22:38:23+01:00 879.000000 323.000000
2015-06-04 22:51:06+01:00 925.000000 -173.000000
2015-06-04 22:51:08+01:00 925.000000 302.000000
2015-06-04 22:52:13+01:00 1139.017857 290.044643
2015-06-04 23:10:57+01:00 1365.000000 315.000000

In [12]:
h.model


Out[12]:
{}

In [13]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");
#plt.show()


Out[13]:
<matplotlib.text.Text at 0x214e9cf8>

In [14]:
h.pair_df


Out[14]:
T1 Time T1 Active T1 Reactive T2 Time T2 Active T2 Reactive
0 2015-06-30 23:00:38 2733.000000 0.000000 2015-06-30 23:02:04 -2740.000000 -21.000000
1 2015-06-30 23:27:16 89.000000 0.000000 2015-06-30 23:34:52 -159.142857 -0.285714
2 2015-06-30 23:35:00 91.142857 -75.714286 2015-06-30 23:35:22 0.800000 107.600000
3 2015-06-30 23:35:47 5.000000 -85.807692 2015-06-30 23:53:52 -87.000000 0.000000
4 2015-07-01 00:05:56 80.000000 0.000000 2015-07-01 02:35:59 -88.000000 0.000000
5 2015-07-01 04:30:05 2101.000000 0.000000 2015-07-01 04:40:20 -2063.918919 0.000000
6 2015-07-01 05:23:42 118.500000 -645.333333 2015-07-01 05:23:45 -38.500000 639.625000
7 2015-07-01 05:24:28 77.000000 -701.833333 2015-07-01 05:24:35 19.000000 698.000000
8 2015-07-01 05:24:54 16.000000 725.555556 2015-07-01 05:25:23 11.000000 -705.125000
9 2015-07-01 05:24:46 282.000000 -659.333333 2015-07-01 05:25:54 -250.000000 674.200000
10 2015-07-01 05:02:44 104.000000 0.000000 2015-07-01 05:25:59 -52.000000 76.400000
11 2015-07-01 05:27:54 70.000000 -765.333333 2015-07-01 05:28:03 0.000000 810.000000
12 2015-07-01 05:26:30 103.000000 753.000000 2015-07-01 05:28:11 -7.000000 -730.000000
13 2015-07-01 05:28:30 34.250000 -786.666667 2015-07-01 05:28:39 0.000000 771.285714
14 2015-07-01 05:28:47 97.000000 -879.285714 2015-07-01 05:28:57 0.000000 849.000000
15 2015-07-01 05:30:02 218.000000 -726.600000 2015-07-01 05:30:08 -187.000000 694.000000
16 2015-07-01 05:30:37 21.000000 -853.000000 2015-07-01 05:30:47 0.000000 835.333333
17 2015-07-01 05:30:19 296.000000 -770.166667 2015-07-01 05:32:17 -326.000000 854.000000
18 2015-07-01 05:31:55 80.000000 -254.000000 2015-07-01 05:32:51 0.000000 348.000000
19 2015-07-01 05:32:45 20.000000 -679.222222 2015-07-01 05:33:13 0.000000 748.000000
20 2015-07-01 05:31:35 52.000000 482.000000 2015-07-01 05:33:27 0.000000 -508.000000
21 2015-07-01 05:33:20 30.000000 -683.000000 2015-07-01 05:33:31 0.000000 762.000000
22 2015-07-01 05:33:40 14.000000 -704.714286 2015-07-01 05:34:25 0.000000 764.000000
23 2015-07-01 05:34:33 53.000000 -616.666667 2015-07-01 05:35:01 0.000000 702.400000
24 2015-07-01 05:34:59 23.000000 -160.000000 2015-07-01 05:35:15 -32.000000 140.000000
25 2015-07-01 05:35:10 38.000000 -871.666667 2015-07-01 05:35:37 0.000000 845.111111
26 2015-07-01 05:35:35 99.000000 -137.000000 2015-07-01 05:35:49 -42.000000 185.000000
27 2015-07-01 05:34:52 25.000000 -803.333333 2015-07-01 05:35:57 0.000000 839.125000
28 2015-07-01 05:35:55 37.000000 -119.000000 2015-07-01 05:36:13 0.000000 144.000000
29 2015-07-01 05:36:41 70.000000 -753.125000 2015-07-01 05:36:51 0.000000 738.000000
... ... ... ... ... ... ...
1312 2015-07-04 20:16:04 738.523810 37.761905 2015-07-04 20:16:36 -762.000000 0.500000
1313 2015-07-04 20:18:12 1234.000000 -0.714286 2015-07-04 20:18:46 -1261.000000 -29.000000
1314 2015-07-04 20:19:34 770.000000 -2.000000 2015-07-04 20:20:22 -733.000000 5.500000
1315 2015-07-04 20:19:48 1356.090909 44.727273 2015-07-04 20:20:25 -1351.000000 -74.500000
1316 2015-07-04 20:20:57 74.000000 -48.000000 2015-07-04 20:20:59 -36.000000 141.000000
1317 2015-07-04 20:22:02 698.000000 -0.200000 2015-07-04 20:24:06 -739.000000 -1.500000
1318 2015-07-04 20:24:32 2072.000000 -0.578947 2015-07-04 20:25:24 -2068.000000 0.000000
1319 2015-07-04 20:29:58 756.000000 0.000000 2015-07-04 20:30:14 -769.000000 0.000000
1320 2015-07-04 20:30:35 11.000000 -86.000000 2015-07-04 20:30:39 0.000000 77.000000
1321 2015-07-04 20:29:18 672.000000 0.000000 2015-07-04 20:31:02 -712.000000 -86.136364
1322 2015-07-04 20:27:27 1356.000000 40.000000 2015-07-04 20:31:10 -1385.000000 -114.333333
1323 2015-07-04 20:30:46 2058.000000 0.000000 2015-07-04 20:32:31 -2125.000000 -7.000000
1324 2015-07-04 20:31:37 1329.250000 139.500000 2015-07-04 20:33:47 -1345.000000 -87.750000
1325 2015-07-04 20:31:33 793.000000 -63.500000 2015-07-04 20:34:36 -724.000000 -2.000000
1326 2015-07-04 20:38:10 702.000000 -0.071429 2015-07-04 20:38:25 -708.000000 -96.928571
1327 2015-07-04 20:37:32 1373.461538 123.076923 2015-07-04 20:38:33 -1417.000000 -140.000000
1328 2015-07-04 20:37:39 79.000000 -23.000000 2015-07-04 20:38:57 0.000000 110.384615
1329 2015-07-04 20:37:13 19.000000 -101.571429 2015-07-04 20:39:10 0.000000 80.615385
1330 2015-07-04 20:39:46 753.000000 92.000000 2015-07-04 20:40:07 -683.000000 -13.000000
1331 2015-07-04 20:40:56 767.000000 78.800000 2015-07-04 20:41:59 -673.000000 -70.000000
1332 2015-07-04 20:41:02 1349.000000 93.333333 2015-07-04 20:42:16 -1340.000000 1.500000
1333 2015-07-04 20:40:14 1308.000000 0.166667 2015-07-04 20:43:16 -1316.000000 -10.000000
1334 2015-07-04 20:44:53 81.000000 -43.750000 2015-07-04 20:45:01 0.000000 89.333333
1335 2015-07-04 20:47:58 120.205882 0.000000 2015-07-04 20:59:12 -81.914474 0.000000
1336 2015-07-04 21:16:24 101.000000 0.000000 2015-07-04 21:16:53 -71.250000 0.000000
1337 2015-07-04 21:17:45 125.000000 0.000000 2015-07-04 21:18:01 -122.700000 0.000000
1338 2015-07-04 21:23:02 84.139130 0.000000 2015-07-04 21:33:22 0.000000 77.000000
1339 2015-07-04 21:43:50 158.666667 23.666667 2015-07-04 21:48:18 -254.000000 0.000000
1340 2015-07-04 22:03:00 72.272727 0.000000 2015-07-04 22:42:20 0.000000 -96.133333
1341 2015-07-04 21:58:36 86.888889 0.000000 2015-07-04 22:42:25 0.000000 97.800000

1342 rows × 6 columns

Set two days for Disaggregation period of interest

Inspect the data during a quiet period when we were on holiday, should only be autonomous appliances such as fidge, freeze and water heating + any standby devices not unplugged.


In [55]:
gjw.set_window('2015-07-13 00:00:00','2015-07-14 00:00:00')
elec = gjw.buildings[building_number].elec
mains = elec.mains()
mains.plot()


Out[55]:
<matplotlib.axes._subplots.AxesSubplot at 0x228f9cc0>

Disaggregate using Hart (Active data only)


In [56]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");



In [15]:
disag_filename = join(data_dir, 'disag_gjw_hart.hdf5')
output = HDFDataStore(disag_filename, 'w')
h.disaggregate(mains,output,sample_period=1)
output.close()


Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.

In [16]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");



In [18]:
disag_hart = DataSet(disag_filename)
disag_hart


Out[18]:
<nilmtk.dataset.DataSet at 0x2318c908>

In [19]:
disag_hart_elec = disag_hart.buildings[building_number].elec
disag_hart_elec


Out[19]:
MeterGroup(meters=
  ElecMeter(instance=1, building=1, dataset='Hart85_2015-10-07T11:51:36', site_meter, appliances=[])
  ElecMeter(instance=2, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=0)])
  ElecMeter(instance=3, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=1)])
  ElecMeter(instance=4, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=2)])
  ElecMeter(instance=5, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=3)])
  ElecMeter(instance=6, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=4)])
  ElecMeter(instance=7, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=5)])
  ElecMeter(instance=8, building=1, dataset='Hart85_2015-10-07T11:51:36', appliances=[Appliance(type='unknown', instance=6)])
)

In [20]:
disag_hart_elec.mains()


Out[20]:
ElecMeter(instance=1, building=1, dataset='Hart85_2015-10-07T11:51:36', site_meter, appliances=[])

In [21]:
h.centroids


Out[21]:
(power, active)
0 164.517390
1 2084.002353
2 2712.309782
3 1276.028081
4 3335.258491
5 4755.483947
6 4078.086616

In [22]:
h.model


Out[22]:
{}

In [25]:
h.steady_states


Out[25]:
active average
2015-05-01 00:13:32+01:00 2739.000000
2015-05-01 00:15:24+01:00 1029.034091
2015-05-01 00:26:15+01:00 920.000000
2015-05-01 00:36:18+01:00 645.333333
2015-05-01 00:55:06+01:00 3875.000000
2015-05-01 01:00:30+01:00 703.000000
2015-05-01 01:00:59+01:00 2856.000000
2015-05-01 01:02:26+01:00 3067.318182
2015-05-01 01:06:50+01:00 1009.000000
2015-05-01 01:07:40+01:00 4503.000000
2015-05-01 01:08:10+01:00 5414.000000
2015-05-01 01:11:06+01:00 1014.000000
2015-05-01 01:11:45+01:00 3129.000000
2015-05-01 01:13:57+01:00 995.000000
2015-05-01 01:17:02+01:00 2760.796875
2015-05-01 01:19:30+01:00 695.000000
2015-05-01 01:21:27+01:00 2808.000000
2015-05-01 01:23:37+01:00 656.900000
2015-05-01 01:25:10+01:00 611.000000
2015-05-01 01:25:20+01:00 2695.000000
2015-05-01 01:25:27+01:00 2784.000000
2015-05-01 01:25:31+01:00 2700.000000
2015-05-01 01:26:56+01:00 551.000000
2015-05-01 02:43:35+01:00 477.600000
2015-05-01 03:02:11+01:00 513.000000
2015-05-01 04:02:04+01:00 406.000000
2015-05-01 04:02:13+01:00 453.277778
2015-05-01 04:02:31+01:00 378.333333
2015-05-01 04:15:14+01:00 510.117188
2015-05-01 04:20:32+01:00 426.000000
... ...
2015-08-30 19:27:17+01:00 227.000000
2015-08-30 19:31:13+01:00 2477.000000
2015-08-30 19:34:22+01:00 325.777778
2015-08-30 19:40:54+01:00 330.500000
2015-08-30 19:42:25+01:00 2437.000000
2015-08-30 19:43:00+01:00 257.000000
2015-08-30 19:56:53+01:00 267.979487
2015-08-30 20:24:43+01:00 457.538462
2015-08-30 20:25:56+01:00 512.666667
2015-08-30 20:26:00+01:00 434.000000
2015-08-30 20:27:34+01:00 526.000000
2015-08-30 20:40:35+01:00 452.000000
2015-08-30 20:42:51+01:00 527.312500
2015-08-30 20:43:07+01:00 453.936170
2015-08-30 20:47:44+01:00 332.976378
2015-08-30 21:05:11+01:00 418.000000
2015-08-30 21:25:49+01:00 398.000000
2015-08-30 21:49:17+01:00 450.000000
2015-08-30 21:50:47+01:00 3221.000000
2015-08-30 21:56:41+01:00 396.983806
2015-08-30 22:36:04+01:00 399.000000
2015-08-30 22:46:16+01:00 494.000000
2015-08-30 22:55:14+01:00 543.953782
2015-08-30 22:59:12+01:00 445.000000
2015-08-30 23:21:36+01:00 765.000000
2015-08-30 23:22:45+01:00 439.000000
2015-08-30 23:48:23+01:00 327.000000
2015-08-30 23:53:13+01:00 3131.500000
2015-08-30 23:53:28+01:00 4484.500000
2015-08-30 23:55:10+01:00 424.789474

55488 rows × 1 columns


In [17]:
from nilmtk.metrics import f1_score
f1_hart= f1_score(disag_hart_elec, test_elec)
f1_hart.index = disag_hart_elec.get_labels(f1_hart.index)
f1_hart.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("Hart");


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-631f0a07efc5> in <module>()
      1 from nilmtk.metrics import f1_score
----> 2 f1_hart= f1_score(disag_hart_elec, test_elec)
      3 f1_hart.index = disag_hart_elec.get_labels(f1_hart.index)
      4 f1_hart.plot(kind='barh')
      5 plt.ylabel('appliance');

NameError: name 'test_elec' is not defined

In [ ]: