Disaggregation - Hart Active data only

Customary imports


In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from os.path import join
from pylab import rcParams
import matplotlib.pyplot as plt
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
#import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate.hart_85 import Hart85
from nilmtk.disaggregate import CombinatorialOptimisation
from nilmtk.utils import print_dict, show_versions
from nilmtk.metrics import f1_score
#import seaborn as sns
#sns.set_palette("Set3", n_colors=12)

import warnings
warnings.filterwarnings("ignore") #suppress warnings, comment out if warnings required

show versions for any diagnostics


In [2]:
#uncomment if required
#show_versions()

Load dataset


In [3]:
data_dir = '/Users/GJWood/nilm_gjw_data/HDF5/'
gjw = DataSet(join(data_dir, 'nilm_gjw_data.hdf5'))
print('loaded ' + str(len(gjw.buildings)) + ' buildings')
building_number=1


loaded 1 buildings

Use 4 working days for training


In [4]:
gjw.set_window('2015-06-01 00:00:00', '2015-06-05 00:00:00')
elec = gjw.buildings[building_number].elec
mains = elec.mains()
mains.plot()
#plt.show()


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x181a2ef0>

In [5]:
house = elec['fridge'] #only one meter so any selection will do
df = house.load().next() #load the first chunk of data into a dataframe
#df.info() #check that the data is what we want (optional)
#note the data has two columns and a time index

Training

We'll now do the training from the aggregate data. The algorithm segments the time series data into steady and transient states. Thus, we'll first figure out the transient and the steady states. Next, we'll try and pair the on and the off transitions based on their proximity in time and value.


In [6]:
df.ix['2015-06-01 10:00:00+01:00':'2015-06-05 12:00:00+01:00'].plot()# select a time range and plot it
#plt.show()


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1bb19a90>

In [7]:
h = Hart85()
h.train(mains,cols=[('power','active')])


Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.

In [8]:
h.steady_states.head()


Out[8]:
active average
2015-06-01 00:00:16+01:00 980.064706
2015-06-01 00:08:45+01:00 843.000000
2015-06-01 00:11:41+01:00 723.000000
2015-06-01 00:13:40+01:00 618.000000
2015-06-01 00:25:51+01:00 693.241935

In [9]:
h.steady_states.tail()


Out[9]:
active average
2015-06-04 23:22:44+01:00 1319
2015-06-04 23:34:17+01:00 1603
2015-06-04 23:42:13+01:00 1527
2015-06-04 23:47:46+01:00 1460
2015-06-04 23:59:54+01:00 1477

In [10]:
h.centroids


Out[10]:
(power, active)
0 174.816851
1 2095.918107
2 699.709904
3 1326.949720
4 2699.516843
5 1772.166667

In [11]:
h.model


Out[11]:
{}

In [12]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");
#plt.show()


Out[12]:
<matplotlib.text.Text at 0x2077b048>

In [23]:
h.pair_df.head()


Out[23]:
T1 Time T1 Active T2 Time T2 Active
0 2015-05-31 23:56:25 237.027559 2015-06-01 00:15:51 -308.000000
1 2015-06-01 03:08:03 137.000000 2015-06-01 03:10:16 -79.002874
2 2015-06-01 02:01:21 77.000000 2015-06-01 03:35:32 -90.500000
3 2015-06-01 03:36:08 78.500000 2015-06-01 03:38:10 -72.000000
4 2015-06-01 03:35:14 267.000000 2015-06-01 03:38:59 -270.000000

In [51]:
pair_shape_df = pd.DataFrame(columns=['Height','Duration'])
pair_shape_df['Height']= (h.pair_df['T1 Active'].abs()+h.pair_df['T2 Active'].abs())/2
pair_shape_df['Duration']= pd.to_timedelta(h.pair_df['T2 Time']-h.pair_df['T1 Time'],unit='s').dt.seconds
pair_shape_df.head()


Out[51]:
Height Duration
0 272.513780 1166
1 108.001437 133
2 83.750000 5651
3 75.250000 122
4 268.500000 225

In [69]:
fig = plt.figure(figsize=(13,6))
ax = fig.add_subplot(1, 1, 1)
ax.set_yscale('log')
ax.scatter(pair_shape_df['Height'],pair_shape_df['Duration'])
plt.title("Paired event -  Signature Space")
plt.ylabel("Log Duration (sec)")
plt.xlabel("Transition (W)");


Set two days for Disaggregation period of interest

Inspect the data during a quiet period when we were on holiday, should only be autonomous appliances such as fidge, freeze and water heating + any standby devices not unplugged.


In [14]:
gjw.set_window('2015-06-08 00:00:00','2015-06-10 00:00:00')
elec = gjw.buildings[building_number].elec
mains = elec.mains()
mains.plot()


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x215f2e10>

In [15]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");



In [22]:
plt.hist(h.steady_states['active average'],250)
plt.ylabel("Frequency")
plt.xlabel("Power (w)")
plt.title("Active Average distribution");


Disaggregate using Hart (Active data only)


In [20]:
disag_filename = join(data_dir, 'disag_gjw_hart.hdf5')
output = HDFDataStore(disag_filename, 'w')
h.disaggregate(mains,output,sample_period=1)
output.close()


Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.

In [21]:
ax = mains.plot()
h.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");



In [22]:
disag_hart = DataSet(disag_filename)
disag_hart


Out[22]:
<nilmtk.dataset.DataSet at 0x212675c0>

In [23]:
disag_hart_elec = disag_hart.buildings[building_number].elec
disag_hart_elec


Out[23]:
MeterGroup(meters=
  ElecMeter(instance=1, building=1, dataset='Hart85_2015-10-08T15:53:54', site_meter, appliances=[])
  ElecMeter(instance=2, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=0)])
  ElecMeter(instance=3, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=1)])
  ElecMeter(instance=4, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=2)])
  ElecMeter(instance=5, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=3)])
  ElecMeter(instance=6, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=4)])
  ElecMeter(instance=7, building=1, dataset='Hart85_2015-10-08T15:53:54', appliances=[Appliance(type='unknown', instance=5)])
)

In [24]:
disag_hart_elec.mains()


Out[24]:
ElecMeter(instance=1, building=1, dataset='Hart85_2015-10-08T15:53:54', site_meter, appliances=[])

In [25]:
h.centroids


Out[25]:
(power, active)
0 174.816851
1 2095.918107
2 699.709904
3 1326.949720
4 2699.516843
5 1772.166667

In [26]:
h.model


Out[26]:
{}

In [27]:
h.steady_states


Out[27]:
active average
2015-06-01 00:00:16+01:00 980.064706
2015-06-01 00:08:45+01:00 843.000000
2015-06-01 00:11:41+01:00 723.000000
2015-06-01 00:13:40+01:00 618.000000
2015-06-01 00:25:51+01:00 693.241935
2015-06-01 00:33:35+01:00 365.000000
2015-06-01 00:56:25+01:00 622.027559
2015-06-01 01:15:51+01:00 236.000000
2015-06-01 03:01:21+01:00 338.000000
2015-06-01 04:08:03+01:00 396.000000
2015-06-01 04:10:16+01:00 316.997126
2015-06-01 04:34:03+01:00 575.000000
2015-06-01 04:35:14+01:00 827.000000
2015-06-01 04:35:32+01:00 767.500000
2015-06-01 04:36:08+01:00 846.000000
2015-06-01 04:38:03+01:00 2694.000000
2015-06-01 04:38:10+01:00 2584.000000
2015-06-01 04:38:17+01:00 2666.200000
2015-06-01 04:38:48+01:00 2670.000000
2015-06-01 04:38:59+01:00 2400.000000
2015-06-01 04:39:18+01:00 2667.000000
2015-06-01 04:39:32+01:00 2510.000000
2015-06-01 04:39:35+01:00 2642.000000
2015-06-01 04:40:12+01:00 2645.000000
2015-06-01 04:40:16+01:00 2397.000000
2015-06-01 04:40:19+01:00 2634.375000
2015-06-01 04:40:31+01:00 2336.000000
2015-06-01 04:40:35+01:00 2622.000000
2015-06-01 04:40:52+01:00 2568.000000
2015-06-01 04:41:16+01:00 2383.235294
... ...
2015-06-04 19:35:47+01:00 534.088757
2015-06-04 19:56:39+01:00 493.000000
2015-06-04 20:16:28+01:00 616.000000
2015-06-04 20:16:54+01:00 536.000000
2015-06-04 20:18:07+01:00 607.000000
2015-06-04 20:24:21+01:00 507.571429
2015-06-04 20:42:43+01:00 616.000000
2015-06-04 20:44:39+01:00 700.000000
2015-06-04 20:55:08+01:00 529.000000
2015-06-04 20:57:17+01:00 650.000000
2015-06-04 20:59:53+01:00 796.000000
2015-06-04 21:26:19+01:00 799.000000
2015-06-04 21:29:53+01:00 823.000000
2015-06-04 22:04:31+01:00 1442.000000
2015-06-04 22:04:42+01:00 693.000000
2015-06-04 22:17:42+01:00 1020.000000
2015-06-04 22:17:49+01:00 2036.750000
2015-06-04 22:19:39+01:00 720.666667
2015-06-04 22:31:34+01:00 828.932432
2015-06-04 22:36:40+01:00 1023.000000
2015-06-04 22:38:23+01:00 879.000000
2015-06-04 22:44:05+01:00 970.000000
2015-06-04 22:52:13+01:00 1139.017857
2015-06-04 23:10:57+01:00 1365.000000
2015-06-04 23:13:11+01:00 1346.974843
2015-06-04 23:22:44+01:00 1319.000000
2015-06-04 23:34:17+01:00 1603.000000
2015-06-04 23:42:13+01:00 1527.000000
2015-06-04 23:47:46+01:00 1460.000000
2015-06-04 23:59:54+01:00 1477.000000

2526 rows × 1 columns


In [17]:
from nilmtk.metrics import f1_score
f1_hart= f1_score(disag_hart_elec, test_elec)
f1_hart.index = disag_hart_elec.get_labels(f1_hart.index)
f1_hart.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("Hart");


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-631f0a07efc5> in <module>()
      1 from nilmtk.metrics import f1_score
----> 2 f1_hart= f1_score(disag_hart_elec, test_elec)
      3 f1_hart.index = disag_hart_elec.get_labels(f1_hart.index)
      4 f1_hart.plot(kind='barh')
      5 plt.ylabel('appliance');

NameError: name 'test_elec' is not defined

In [ ]: