In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [50],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 20
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [10]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [11]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [12]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [13]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[13]:
[array([[  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07],
        ..., 
        [  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [14]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(dim=ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
net.addModule(TanhLayer(10, name='tanh_input')) 
net.addModule(TanhLayer(10, name='tanh_output')) 
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)

#c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
#c_output_bias._setParameters(np.zeros(1))
#net.addConnection(c_output_bias)

c_tanh_input_bias = FullConnection(bias, net['tanh_input'], name='c_tanh_input_bias')
c_tanh_input_bias._params = np.random.uniform(-0.1, 0.1, size=c_tanh_input_bias.paramdim)
net.addConnection(c_tanh_input_bias)

c_tanh_output_bias = FullConnection(bias, net['tanh_output'], name='c_tanh_output_bias')
c_tanh_output_bias._params = np.random.uniform(-0.1, 0.1, size=c_tanh_output_bias.paramdim)
net.addConnection(c_tanh_output_bias)

forwards_connection = FullConnection(net['in'], net['tanh_input'], name='c_in_to_tanh')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

# Add other connections
n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'tanh_input'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    
    recurrent_connection = FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name)
    recurrent_connection._params = np.random.uniform(-0.05, 0.05, size=recurrent_connection.paramdim)
    net.addRecurrentConnection(recurrent_connection)
    
    #bias_connection = FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias')
    #bias_connection._params = np.zeros(bias_connection.paramdim)
    #net.addConnection(bias_connection)
    
    forwards_connection = FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name)
    forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
    net.addConnection(forwards_connection)
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
connect_to_out = FullConnection(net[layer_name], net['tanh_output'], name='c_' + layer_name + '_to_tanh_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

connect_to_out = FullConnection(net['tanh_output'], net['out'], name='c_tanh_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-7
   Modules:
    [<BiasUnit 'BiasUnit-6'>, <LinearLayer 'in'>, <TanhLayer 'tanh_input'>, <LSTMLayer 'LSTM1'>, <TanhLayer 'tanh_output'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_to_tanh_out': 'LSTM1' -> 'tanh_output'>, <FullConnection 'c_in_to_tanh': 'in' -> 'tanh_input'>, <FullConnection 'c_tanh_input_bias': 'BiasUnit-6' -> 'tanh_input'>, <FullConnection 'c_tanh_input_to_LSTM1': 'tanh_input' -> 'LSTM1'>, <FullConnection 'c_tanh_output_bias': 'BiasUnit-6' -> 'tanh_output'>, <FullConnection 'c_tanh_to_out': 'tanh_output' -> 'out'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>]

In [15]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [16]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch      0  total error     0.001009   avg weight       0.12673
epoch      1  total error    0.0067575   avg weight       0.16172
epoch      2  total error      0.00043   avg weight       0.18986
epoch      3  total error      0.19609   avg weight       0.21049
epoch      4  total error    0.0017174   avg weight       0.21906
epoch      5  total error     0.012219   avg weight       0.23424
epoch      6  total error     0.039502   avg weight       0.24298
epoch      7  total error    0.0021483   avg weight       0.24471
epoch      8  total error    0.0032025   avg weight       0.25076
epoch      9  total error     0.011332   avg weight       0.25028
epoch     10  total error   0.00042501   avg weight       0.25835
epoch     11  total error   0.00066633   avg weight       0.26184
epoch     12  total error   0.00076163   avg weight       0.26318
epoch     13  total error    0.0003418   avg weight       0.26292
epoch     14  total error    0.0005282   avg weight       0.26309
epoch     15  total error   0.00026973   avg weight       0.26675
epoch     16  total error          nan   avg weight           nan
epoch     17  total error          nan   avg weight           nan
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-b36b80f7b18a> in <module>()
      8 print("Starting training with", EPOCHS, "epochs...")
      9 for i in xrange(CONFIG['CYCLES']):
---> 10     trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
     11 #    train_errors.append(trainer.testOnData())
     12     # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/trainer.pyc in trainEpochs(self, epochs, *args, **kwargs)
     35         Additional arguments are passed on to the train method."""
     36         for dummy in range(epochs):
---> 37             self.train(*args, **kwargs)
     38 
     39     def train(self):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/rprop.pyc in train(self)
     42         ponderation = 0
     43         for seq in self.ds._provideSequences():
---> 44             e, p = self._calcDerivs(seq)
     45             errors += e
     46             ponderation += p

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/supervised/trainers/backprop.pyc in _calcDerivs(self, seq)
    103                 # ndarray class fixes something,
    104                 str(outerr)
--> 105                 self.module.backActivate(outerr)
    106 
    107         return error, ponderation

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in backActivate(self, outerr)
     58         the error on the input."""
     59         self.outputerror[self.offset - 1] = outerr
---> 60         self.backward()
     61         return self.inputerror[self.offset].copy()
     62 

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in backward(self)
     72         """Produce the input error from the output error."""
     73         self.offset -= 1
---> 74         super(RecurrentNetworkComponent, self).backward()
     75 
     76     def _isLastTimestep(self):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/modules/module.pyc in backward(self)
     80                                      self.inputerror[self.offset],
     81                                      self.outputbuffer[self.offset],
---> 82                                      self.inputbuffer[self.offset])
     83 
     84     def reset(self):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/networks/recurrent.pyc in _backwardImplementation(self, outerr, inerr, outbuf, inbuf)
    124         for m in reversed(self.modulesSorted):
    125             for c in self.connections[m]:
--> 126                 c.backward(offset, offset)
    127             m.offset = offset
    128             m.backward()

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/connections/connection.pyc in backward(self, inmodOffset, outmodOffset)
     91             self.outmod.inputerror[outmodOffset, self.outSliceFrom:self.outSliceTo],
     92             self.inmod.outputerror[inmodOffset, self.inSliceFrom:self.inSliceTo],
---> 93             self.inmod.outputbuffer[inmodOffset, self.inSliceFrom:self.inSliceTo])
     94 
     95     def _forwardImplementation(self, inbuf, outbuf):

/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/structure/connections/full.pyc in _backwardImplementation(self, outerr, inerr, inbuf)
     22         inerr += dot(reshape(self.params, (self.outdim, self.indim)).T, outerr)
     23         ds = self.derivs
---> 24         ds += outer(inbuf, outerr).T.flatten()
     25 
     26     def whichBuffers(self, paramIndex):

/usr/local/lib/python2.7/dist-packages/numpy/core/numeric.pyc in outer(a, b, out)
   1074     a = asarray(a)
   1075     b = asarray(b)
-> 1076     return multiply(a.ravel()[:, newaxis], b.ravel()[newaxis,:], out)
   1077 
   1078 # try to import blas optimized dot if available

KeyboardInterrupt: 

In [ ]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)

In [ ]:
estimates.plot()
plt.show()

In [ ]:
mains[START:END].plot()
plt.show()

In [ ]:
appliances[START:END].plot()
plt.show()

In [ ]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()

In [ ]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [ ]: