In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [15],
    PEEPHOLES = False,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 17
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [10]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [11]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [12]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [13]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[13]:
[array([[  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07],
        ..., 
        [  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [14]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)
c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
c_output_bias._setParameters(np.zeros(1))
net.addConnection(c_output_bias)
    
# Add other connections
n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'in'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    
    recurrent_connection = FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name)
    recurrent_connection._params = np.random.uniform(-0.05, 0.05, size=recurrent_connection.paramdim)
    net.addRecurrentConnection(recurrent_connection)
    
    #bias_connection = FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias')
    #bias_connection._params = np.zeros(bias_connection.paramdim)
    #net.addConnection(bias_connection)
    
    forwards_connection = FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name)
    forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
    net.addConnection(forwards_connection)
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
connect_to_out = FullConnection(net[layer_name], net['out'], name='c_' + layer_name + '_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-5
   Modules:
    [<BiasUnit 'BiasUnit-4'>, <LinearLayer 'in'>, <LSTMLayer 'LSTM1'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_to_out': 'LSTM1' -> 'out'>, <FullConnection 'c_in_to_LSTM1': 'in' -> 'LSTM1'>, <FullConnection 'c_output_bias': 'BiasUnit-4' -> 'out'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>]

In [15]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [16]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch      0  total error   0.00022833   avg weight      0.041528
epoch      1  total error    0.0051486   avg weight       0.10786
epoch      2  total error   0.00020768   avg weight        0.1315
epoch      3  total error     0.001413   avg weight       0.15818
epoch      4  total error    0.0002011   avg weight       0.18506
epoch      5  total error    0.0004694   avg weight       0.19379
epoch      6  total error   0.00017919   avg weight        0.2144
epoch      7  total error   0.00022457   avg weight       0.22921
epoch      8  total error    0.0001534   avg weight       0.25929
epoch      9  total error   0.00016549   avg weight       0.27215
epoch     10  total error   0.00014458   avg weight        0.2776
epoch     11  total error   0.00014599   avg weight        0.2963
epoch     12  total error   0.00013964   avg weight       0.32619
epoch     13  total error   0.00013926   avg weight       0.34831
epoch     14  total error    0.0001368   avg weight       0.36482
epoch     15  total error   0.00013508   avg weight       0.38405
epoch     16  total error   0.00013263   avg weight       0.43347
epoch     17  total error   0.00013142   avg weight       0.49393
epoch     18  total error   0.00012861   avg weight       0.54146
epoch     19  total error   0.00012571   avg weight       0.62393
epoch     20  total error   0.00012361   avg weight       0.72082
epoch     21  total error   0.00012067   avg weight       0.82931
epoch     22  total error   0.00011713   avg weight       0.95228
epoch     23  total error   0.00011209   avg weight       0.89565
epoch     24  total error   0.00010599   avg weight       0.96393
epoch     25  total error   0.00010437   avg weight        1.0658
epoch     26  total error   0.00010133   avg weight        1.0756
epoch     27  total error   9.4999e-05   avg weight        1.0396
epoch     28  total error   9.5894e-05   avg weight        1.0692
epoch     29  total error   8.3618e-05   avg weight        1.1413
Finished training.  total seconds = 1513.70633817

In [17]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)


Starting disaggregation...

In [18]:
estimates.plot()
plt.show()



In [19]:
mains[START:END].plot()
plt.show()



In [20]:
appliances[START:END].plot()
plt.show()



In [21]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()



In [22]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [22]: