In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [50, 50],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 27
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [10]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [11]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [12]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [13]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[13]:
[array([[  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07],
        ..., 
        [  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [14]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(dim=ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
net.addModule(TanhLayer(10, name='tanh_input')) 
net.addModule(TanhLayer(10, name='tanh_output')) 
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)

#c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
#c_output_bias._setParameters(np.zeros(1))
#net.addConnection(c_output_bias)

c_tanh_input_bias = FullConnection(bias, net['tanh_input'], name='c_tanh_input_bias')
c_tanh_input_bias._params = np.random.uniform(-0.1, 0.1, size=c_tanh_input_bias.paramdim)
net.addConnection(c_tanh_input_bias)

c_tanh_output_bias = FullConnection(bias, net['tanh_output'], name='c_tanh_output_bias')
c_tanh_output_bias._params = np.random.uniform(-0.1, 0.1, size=c_tanh_output_bias.paramdim)
net.addConnection(c_tanh_output_bias)

forwards_connection = FullConnection(net['in'], net['tanh_input'], name='c_in_to_tanh')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

# Add other connections
n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'tanh_input'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    
    recurrent_connection = FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name)
    recurrent_connection._params = np.random.uniform(-0.05, 0.05, size=recurrent_connection.paramdim)
    net.addRecurrentConnection(recurrent_connection)
    
    #bias_connection = FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias')
    #bias_connection._params = np.zeros(bias_connection.paramdim)
    #net.addConnection(bias_connection)
    
    forwards_connection = FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name)
    forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
    net.addConnection(forwards_connection)
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
connect_to_out = FullConnection(net[layer_name], net['tanh_output'], name='c_' + layer_name + '_to_tanh_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

connect_to_out = FullConnection(net['tanh_output'], net['out'], name='c_tanh_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-8
   Modules:
    [<BiasUnit 'BiasUnit-7'>, <LinearLayer 'in'>, <TanhLayer 'tanh_input'>, <LSTMLayer 'LSTM1'>, <LSTMLayer 'LSTM2'>, <TanhLayer 'tanh_output'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_to_LSTM2': 'LSTM1' -> 'LSTM2'>, <FullConnection 'c_LSTM2_to_tanh_out': 'LSTM2' -> 'tanh_output'>, <FullConnection 'c_in_to_tanh': 'in' -> 'tanh_input'>, <FullConnection 'c_tanh_input_bias': 'BiasUnit-7' -> 'tanh_input'>, <FullConnection 'c_tanh_input_to_LSTM1': 'tanh_input' -> 'LSTM1'>, <FullConnection 'c_tanh_output_bias': 'BiasUnit-7' -> 'tanh_output'>, <FullConnection 'c_tanh_to_out': 'tanh_output' -> 'out'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>, <FullConnection 'c_LSTM2_to_LSTM2': 'LSTM2' -> 'LSTM2'>]

In [15]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True, delta0=0.001)

In [23]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch     30  total error   0.00014874   avg weight       0.16229
epoch     31  total error   0.00014686   avg weight       0.16954
epoch     32  total error   0.00014537   avg weight       0.17074
epoch     33  total error   0.00014352   avg weight       0.17974
epoch     34  total error   0.00014177   avg weight       0.18547
epoch     35  total error   0.00013974   avg weight       0.19769
epoch     36  total error   0.00013765   avg weight       0.20793
epoch     37  total error   0.00013669   avg weight       0.22592
epoch     38  total error   0.00013648   avg weight       0.22912
epoch     39  total error   0.00013385   avg weight       0.24714
epoch     40  total error   0.00013122   avg weight       0.26037
epoch     41  total error   0.00012792   avg weight       0.28771
epoch     42  total error   0.00012395   avg weight       0.31065
epoch     43  total error   0.00011931   avg weight       0.33982
epoch     44  total error   0.00011386   avg weight       0.35532
epoch     45  total error   0.00010802   avg weight       0.38739
epoch     46  total error   0.00010454   avg weight        0.4019
epoch     47  total error   0.00010307   avg weight       0.42146
epoch     48  total error   9.9174e-05   avg weight       0.44001
epoch     49  total error   9.4455e-05   avg weight       0.47119
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-23-b36b80f7b18a> in <module>()
      8 print("Starting training with", EPOCHS, "epochs...")
      9 for i in xrange(CONFIG['CYCLES']):
---> 10     trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
     11 #    train_errors.append(trainer.testOnData())
     12     # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/trainer.pyc in trainEpochs(self, epochs, *args, **kwargs)
     35         Additional arguments are passed on to the train method."""
     36         for dummy in range(epochs):
---> 37             self.train(*args, **kwargs)
     38 
     39     def train(self):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/rprop.pyc in train(self)
     42         ponderation = 0
     43         for seq in self.ds._provideSequences():
---> 44             e, p = self._calcDerivs(seq)
     45             errors += e
     46             ponderation += p

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/backprop.pyc in _calcDerivs(self, seq)
     83         self.module.reset()
     84         for sample in seq:
---> 85             self.module.activate(sample[0])
     86         error = 0
     87         ponderation = 0.

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in activate(self, inpt)
     48         """Do one transformation of an input and return the result."""
     49         self.inputbuffer[self.offset] = inpt
---> 50         self.forward()
     51         if self.forget:
     52             return self.outputbuffer[self.offset].copy()

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in forward(self)
     65         if not (self.offset + 1 < self.inputbuffer.shape[0]):
     66             self._growBuffers()
---> 67         super(RecurrentNetworkComponent, self).forward()
     68         self.offset += 1
     69         self.maxoffset = max(self.offset, self.maxoffset)

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/modules/module.pyc in forward(self)
     73         """Produce the output from the input."""
     74         self._forwardImplementation(self.inputbuffer[self.offset],
---> 75                                     self.outputbuffer[self.offset])
     76 
     77     def backward(self):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in _forwardImplementation(self, inbuf, outbuf)
     91         if offset > 0:
     92             for c in self.recurrentConns:
---> 93                 c.forward(offset - 1, offset)
     94 
     95         for m in self.modulesSorted:

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/connections/connection.pyc in forward(self, inmodOffset, outmodOffset)
     75         self._forwardImplementation(
     76             self.inmod.outputbuffer[inmodOffset, self.inSliceFrom:self.inSliceTo],
---> 77             self.outmod.inputbuffer[outmodOffset, self.outSliceFrom:self.outSliceTo])
     78 
     79 

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/connections/full.pyc in _forwardImplementation(self, inbuf, outbuf)
     17 
     18     def _forwardImplementation(self, inbuf, outbuf):
---> 19         outbuf += dot(reshape(self.params, (self.outdim, self.indim)), inbuf)
     20 
     21     def _backwardImplementation(self, outerr, inerr, inbuf):

/usr/local/lib/python2.7/dist-packages/numpy/core/fromnumeric.pyc in reshape(a, newshape, order)
    219     except AttributeError:
    220         return _wrapit(a, 'reshape', newshape, order=order)
--> 221     return reshape(newshape, order=order)
    222 
    223 

KeyboardInterrupt: 

In [ ]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)

In [ ]:
estimates.plot()
plt.show()

In [ ]:
mains[START:END].plot()
plt.show()

In [ ]:
appliances[START:END].plot()
plt.show()

In [ ]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()

In [ ]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [22]: