Disaggregation - Combinatorial Optimisation

Customary imports



In [1]:

    
%matplotlib inline
import numpy as np
import pandas as pd
from os.path import join
from pylab import rcParams
import matplotlib.pyplot as plt
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
#import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation
from nilmtk.utils import print_dict, show_versions
from nilmtk.metrics import f1_score
#import seaborn as sns
#sns.set_palette("Set3", n_colors=12)

import warnings
warnings.filterwarnings("ignore") #suppress warnings, comment out if warnings required

show versions for any diagnostics



In [2]:

    
#uncomment if required
#show_versions()

Load dataset



In [3]:

    
data_dir = '/Users/GJWood/nilm_gjw_data/HDF5/'
gjw = DataSet(join(data_dir, 'nilm_gjw_data.hdf5'))
print('loaded ' + str(len(gjw.buildings)) + ' buildings')
building_number=1









    



loaded 1 buildings

Let us perform our analysis on selected 2 days



In [4]:

    
gjw.store.window = TimeFrame(start='2015-09-03 00:00:00+01:00', end='2015-09-05 00:00:00+01:00')
gjw.set_window = TimeFrame(start='2015-09-03 00:00:00+01:00', end='2015-09-05 00:00:00+01:00')
elec = gjw.buildings[building_number].elec
mains = elec.mains()
mains.plot()
#plt.show()









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x1857d978>

check sections are good



In [5]:

    
elec.mains().good_sections()









    Out[5]:





[TimeFrame(start='2015-09-03 00:00:00+01:00', end='2015-09-05 00:00:00+01:00', empty=False)]

Select and check dataframe



In [6]:

    
house = elec['fridge'] #only one meter so any selection will do
df = house.load().next() #load the first chunk of data into a dataframe
df.info() #check that the data is what we want (optional)
#note the data has two columns and a time index









    



<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 172800 entries, 2015-09-03 00:00:00+01:00 to 2015-09-04 23:59:59+01:00
Data columns (total 2 columns):
(power, reactive)    172800 non-null float32
(power, active)      172800 non-null float32
dtypes: float32(2)
memory usage: 2.6 MB



In [7]:

    
df.head()









    Out[7]:






  
    
      physical_quantity
      power
    
    
      type
      reactive
      active
    
  
  
    
      2015-09-03 00:00:00+01:00
      0
      610
    
    
      2015-09-03 00:00:01+01:00
      0
      610
    
    
      2015-09-03 00:00:02+01:00
      0
      610
    
    
      2015-09-03 00:00:03+01:00
      0
      610
    
    
      2015-09-03 00:00:04+01:00
      0
      610



In [8]:

    
df.tail()









    Out[8]:






  
    
      physical_quantity
      power
    
    
      type
      reactive
      active
    
  
  
    
      2015-09-04 23:59:55+01:00
      393
      857
    
    
      2015-09-04 23:59:56+01:00
      393
      857
    
    
      2015-09-04 23:59:57+01:00
      393
      857
    
    
      2015-09-04 23:59:58+01:00
      393
      857
    
    
      2015-09-04 23:59:59+01:00
      393
      857



In [9]:

    
df.plot()
#plt.show()









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x3bfb710>

Training

We'll now do the training from the aggregate data. The algorithm segments the time series data into steady and transient states. Thus, we'll first figure out the transient and the steady states. Next, we'll try and pair the on and the off transitions based on their proximity in time and value.



In [10]:

    
df.ix['2015-09-03 11:00:00+01:00':'2015-09-03 12:00:00+01:00'].plot()# select a time range and plot it
#plt.show()









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x1c4e3860>



In [13]:

    
co = CombinatorialOptimisation()
co.train(elec,cols=[('power','active')])









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-13-5d316312ffa7> in <module>()
      1 co = CombinatorialOptimisation()
----> 2 co.train(elec,cols=[('power','active')])

c:\users\gjwood\nilmtk\nilmtk\disaggregate\combinatorial_optimisation.pyc in train(self, metergroup, num_states_dict, **load_kwargs)
     75         from sklearn.utils.extmath import cartesian
     76         centroids = [model['states'] for model in self.model]
---> 77         self.state_combinations = cartesian(centroids)
     78         # self.state_combinations is a 2D array
     79         # each column is a chan

c:\Users\GJWood\Anaconda\lib\site-packages\sklearn\utils\extmath.pyc in cartesian(arrays, out)
    517     arrays = [np.asarray(x) for x in arrays]
    518     shape = (len(x) for x in arrays)
--> 519     dtype = arrays[0].dtype
    520 
    521     ix = np.indices(shape)

IndexError: list index out of range



In [11]:

    
co.steady_states.head()









    Out[11]:






  
    
      
      active average
      reactive average
    
  
  
    
      2015-09-03 00:20:37+01:00
      701.376344
      0
    
    
      2015-09-03 00:24:02+01:00
      659.000000
      0
    
    
      2015-09-03 00:34:00+01:00
      553.000000
      0
    
    
      2015-09-03 00:39:34+01:00
      321.000000
      0
    
    
      2015-09-03 01:16:31+01:00
      314.000000
      0



In [12]:

    
co.steady_states.tail()









    Out[12]:






  
    
      
      active average
      reactive average
    
  
  
    
      2015-09-04 23:27:45+01:00
      822.20000
      393.000000
    
    
      2015-09-04 23:27:55+01:00
      898.00000
      392.962963
    
    
      2015-09-04 23:28:49+01:00
      898.00000
      304.800000
    
    
      2015-09-04 23:28:54+01:00
      897.96129
      395.083871
    
    
      2015-09-04 23:41:34+01:00
      967.00000
      342.000000



In [13]:

    
ax = mains.plot()
co.steady_states['active average'].plot(style='o', ax = ax);
plt.ylabel("Power (W)")
plt.xlabel("Time");
#plt.show()









    Out[13]:





<matplotlib.text.Text at 0x1c92a0b8>



In [14]:

    
disag_filename = join(data_dir, 'disag_gjw_CO.hdf5')
output = HDFDataStore(disag_filename, 'w')
co.disaggregate(mains,output)
output.close()









    



Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.






    



---------------------------------------------------------------------------
Exception                                 Traceback (most recent call last)
<ipython-input-14-2682aa412291> in <module>()
      1 disag_filename = join(data_dir, 'disag_gjw_hart.hdf5')
      2 output = HDFDataStore(disag_filename, 'w')
----> 3 h.disaggregate(mains,output)
      4 output.close()

c:\users\gjwood\nilmtk\nilmtk\disaggregate\hart_85.pyc in disaggregate(self, mains, output_datastore, **load_kwargs)
    430             measurement = chunk.name
    431             power_df = self.disaggregate_chunk(
--> 432                 chunk, prev, transients)
    433 
    434             cols = pd.MultiIndex.from_tuples([chunk.name])

c:\users\gjwood\nilmtk\nilmtk\disaggregate\hart_85.pyc in disaggregate_chunk(self, chunk, prev, transients)
    313         prev = states.iloc[-1].to_dict()
    314         power_chunk_dict = self.assign_power_from_states(states, prev)
--> 315         return pd.DataFrame(power_chunk_dict, index=chunk.index)
    316 
    317     def assign_power_from_states(self, states_chunk, prev):

c:\Users\GJWood\Anaconda\lib\site-packages\pandas\core\frame.pyc in __init__(self, data, index, columns, dtype, copy)
    211                                  dtype=dtype, copy=copy)
    212         elif isinstance(data, dict):
--> 213             mgr = self._init_dict(data, index, columns, dtype=dtype)
    214         elif isinstance(data, ma.MaskedArray):
    215             import numpy.ma.mrecords as mrecords

c:\Users\GJWood\Anaconda\lib\site-packages\pandas\core\frame.pyc in _init_dict(self, data, index, columns, dtype)
    338 
    339         return _arrays_to_mgr(arrays, data_names, index, columns,
--> 340                               dtype=dtype)
    341 
    342     def _init_ndarray(self, values, index, columns, dtype=None,

c:\Users\GJWood\Anaconda\lib\site-packages\pandas\core\frame.pyc in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
   4788 
   4789     # don't force copy because getting jammed in an ndarray anyway
-> 4790     arrays = _homogenize(arrays, index, dtype)
   4791 
   4792     # from BlockManager perspective

c:\Users\GJWood\Anaconda\lib\site-packages\pandas\core\frame.pyc in _homogenize(data, index, dtype)
   5102 
   5103             v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 5104                                 raise_cast_failure=False)
   5105 
   5106         homogenized.append(v)

c:\Users\GJWood\Anaconda\lib\site-packages\pandas\core\series.pyc in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
   2711     elif subarr.ndim > 1:
   2712         if isinstance(data, np.ndarray):
-> 2713             raise Exception('Data must be 1-dimensional')
   2714         else:
   2715             subarr = _asarray_tuplesafe(data, dtype=dtype)

Exception: Data must be 1-dimensional



In [ ]:

    
disag_hart = DataSet(disag_filename)
disag_hart_elec = disag_hart.buildings[building].elec



In [ ]:

    
from nilmtk.metrics import f1_score
f1_hart= f1_score(disag_hart_elec, test_elec)
f1_hart.index = disag_hart_elec.get_labels(f1_hart.index)
f1_hart.plot(kind='barh')
plt.ylabel('appliance');
plt.xlabel('f-score');
plt.title("Hart");

physical_quantity	power
type	reactive	active
2015-09-03 00:00:00+01:00	0	610
2015-09-03 00:00:01+01:00	0	610
2015-09-03 00:00:02+01:00	0	610
2015-09-03 00:00:03+01:00	0	610
2015-09-03 00:00:04+01:00	0	610

	active average	reactive average
2015-09-03 00:20:37+01:00	701.376344	0
2015-09-03 00:24:02+01:00	659.000000	0
2015-09-03 00:34:00+01:00	553.000000	0
2015-09-03 00:39:34+01:00	321.000000	0
2015-09-03 01:16:31+01:00	314.000000	0

	active average	reactive average
2015-09-04 23:27:45+01:00	822.20000	393.000000
2015-09-04 23:27:55+01:00	898.00000	392.962963
2015-09-04 23:28:49+01:00	898.00000	304.800000
2015-09-04 23:28:54+01:00	897.96129	395.083871
2015-09-04 23:41:34+01:00	967.00000	342.000000