In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [50],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 21
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [10]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [11]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [12]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [13]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[13]:
[array([[  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07],
        ..., 
        [  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [14]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(dim=ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
net.addModule(TanhLayer(10, name='tanh_input')) 
net.addModule(TanhLayer(10, name='tanh_output')) 
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)

#c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
#c_output_bias._setParameters(np.zeros(1))
#net.addConnection(c_output_bias)

c_tanh_input_bias = FullConnection(bias, net['tanh_input'], name='c_tanh_input_bias')
c_tanh_input_bias._params = np.random.uniform(-0.1, 0.1, size=c_tanh_input_bias.paramdim)
net.addConnection(c_tanh_input_bias)

forwards_connection = FullConnection(net['in'], net['tanh_input'], name='c_in_to_tanh')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

# Add other connections
n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'tanh_input'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    
    recurrent_connection = FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name)
    recurrent_connection._params = np.random.uniform(-0.05, 0.05, size=recurrent_connection.paramdim)
    net.addRecurrentConnection(recurrent_connection)
    
    #bias_connection = FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias')
    #bias_connection._params = np.zeros(bias_connection.paramdim)
    #net.addConnection(bias_connection)
    
    forwards_connection = FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name)
    forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
    net.addConnection(forwards_connection)
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
connect_to_out = FullConnection(net[layer_name], net['out'], name='c_' + layer_name + '_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-7
   Modules:
    [<BiasUnit 'BiasUnit-6'>, <LinearLayer 'in'>, <TanhLayer 'tanh_output'>, <TanhLayer 'tanh_input'>, <LSTMLayer 'LSTM1'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_to_out': 'LSTM1' -> 'out'>, <FullConnection 'c_in_to_tanh': 'in' -> 'tanh_input'>, <FullConnection 'c_tanh_input_bias': 'BiasUnit-6' -> 'tanh_input'>, <FullConnection 'c_tanh_input_to_LSTM1': 'tanh_input' -> 'LSTM1'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>]

In [15]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [16]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch      0  total error   0.00024166   avg weight       0.11841
epoch      1  total error      0.29964   avg weight       0.15537
epoch      2  total error     0.025191   avg weight       0.18291
epoch      3  total error    0.0071459   avg weight       0.19379
epoch      4  total error     0.021891   avg weight        0.2084
epoch      5  total error   0.00040316   avg weight       0.21999
epoch      6  total error   0.00018436   avg weight       0.22834
epoch      7  total error     0.015941   avg weight       0.23699
epoch      8  total error   0.00019764   avg weight       0.23694
epoch      9  total error   0.00052899   avg weight       0.24014
epoch     10  total error   0.00016052   avg weight       0.24552
epoch     11  total error   0.00031174   avg weight       0.25395
epoch     12  total error    0.0001567   avg weight       0.25308
epoch     13  total error   0.00017707   avg weight       0.25879
epoch     14  total error   0.00015053   avg weight       0.26418
epoch     15  total error   0.00014854   avg weight       0.27756
epoch     16  total error   0.00014338   avg weight       0.29252
epoch     17  total error   0.00014136   avg weight       0.31347
epoch     18  total error   0.00013672   avg weight       0.33662
epoch     19  total error    0.0001328   avg weight       0.36693
epoch     20  total error   0.00012918   avg weight       0.40273
epoch     21  total error   0.00012559   avg weight       0.44184
epoch     22  total error   0.00011917   avg weight        0.4545
epoch     23  total error   0.00011368   avg weight       0.50045
epoch     24  total error    0.0001209   avg weight       0.52414
epoch     25  total error   0.00011233   avg weight       0.53807
epoch     26  total error   0.00010504   avg weight       0.57231
epoch     27  total error   0.00010284   avg weight       0.62598
epoch     28  total error   0.00010121   avg weight       0.64652
epoch     29  total error   9.5332e-05   avg weight       0.70836
Finished training.  total seconds = 1808.96526408

In [17]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)


Starting disaggregation...

In [18]:
estimates.plot()
plt.show()



In [19]:
mains[START:END].plot()
plt.show()



In [20]:
appliances[START:END].plot()
plt.show()



In [21]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()



In [22]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [22]: