In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [50, 50],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = ['fridge'], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 14
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['fridge freezer']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = df['Site meter'].diff().dropna()
appliances = df.iloc[:,:-1].fillna(0).diff().dropna()
del df

In [10]:
# standardise input
mains = (mains - mains.mean()) / mains.std()

# Constrain outputs to [-1,1] because we're using TanH
appliances /= appliances.abs().max()
# appliances -= 1

In [11]:
mains.plot()
plt.show()



In [12]:
appliances.plot()
plt.show()



In [13]:
# Build PyBrain dataset
N_OUTPUTS = appliances.shape[1]
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', appliances.values)

In [14]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[14]:
[array([[ 0.04330504],
        [-0.00298955],
        [-0.0732154 ],
        ..., 
        [ 0.00119256],
        [ 0.00240348],
        [-0.00412004]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [15]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)
c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
c_output_bias._setParameters(np.zeros(1))
net.addConnection(c_output_bias)
    
# Add other connections
n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'in'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    
    recurrent_connection = FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name)
    recurrent_connection._params = np.random.uniform(-0.05, 0.05, size=recurrent_connection.paramdim)
    net.addRecurrentConnection(recurrent_connection)
    
    #bias_connection = FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias')
    #bias_connection._params = np.zeros(bias_connection.paramdim)
    #net.addConnection(bias_connection)
    
    forwards_connection = FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name)
    forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
    net.addConnection(forwards_connection)
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
connect_to_out = FullConnection(net[layer_name], net['out'], name='c_' + layer_name + '_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-6
   Modules:
    [<BiasUnit 'BiasUnit-5'>, <LinearLayer 'in'>, <LSTMLayer 'LSTM1'>, <LSTMLayer 'LSTM2'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_to_LSTM2': 'LSTM1' -> 'LSTM2'>, <FullConnection 'c_LSTM2_to_out': 'LSTM2' -> 'out'>, <FullConnection 'c_in_to_LSTM1': 'in' -> 'LSTM1'>, <FullConnection 'c_output_bias': 'BiasUnit-5' -> 'out'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>, <FullConnection 'c_LSTM2_to_LSTM2': 'LSTM2' -> 'LSTM2'>]

In [16]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [17]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch      0  total error   0.00015165   avg weight        0.1269
epoch      1  total error      0.48017   avg weight       0.16167
epoch      2  total error    0.0038635   avg weight       0.19355
epoch      3  total error      0.42717   avg weight       0.21829
epoch      4  total error      0.21575   avg weight       0.23091
epoch      5  total error     0.083344   avg weight       0.25099
epoch      6  total error     0.059342   avg weight        0.2642
epoch      7  total error     0.012813   avg weight       0.27428
epoch      8  total error     0.075982   avg weight       0.27588
epoch      9  total error    0.0097013   avg weight        0.2799
epoch     10  total error    0.0061116   avg weight        0.2848
epoch     11  total error    0.0012425   avg weight       0.28958
epoch     12  total error   0.00012541   avg weight       0.29252
epoch     13  total error   0.00011004   avg weight       0.29188
epoch     14  total error   0.00022429   avg weight       0.29366
epoch     15  total error   0.00011505   avg weight       0.29606
epoch     16  total error   8.3235e-05   avg weight       0.29748
epoch     17  total error   9.5161e-05   avg weight       0.29708
epoch     18  total error   7.6964e-05   avg weight        0.3007
epoch     19  total error   7.5329e-05   avg weight       0.30464
epoch     20  total error   7.4744e-05   avg weight       0.30984
epoch     21  total error   7.4059e-05   avg weight       0.31319
epoch     22  total error   7.3054e-05   avg weight       0.31847
epoch     23  total error    7.327e-05   avg weight       0.32724
epoch     24  total error   7.2624e-05   avg weight       0.33413
epoch     25  total error   7.2258e-05   avg weight       0.34621
epoch     26  total error   7.2276e-05   avg weight       0.36298
epoch     27  total error    7.194e-05   avg weight       0.38347
epoch     28  total error   7.1659e-05   avg weight        0.4126
epoch     29  total error   7.1468e-05   avg weight       0.44998
Finished training.  total seconds = 2113.27445507

In [18]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.DataFrame(columns=appliances.columns, index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates.loc[date] = net.activate(mains_value)


Starting disaggregation...

In [19]:
estimates.plot()
plt.show()



In [20]:
appliances[START:END].plot()
plt.show()



In [21]:
mains[START:END].plot()
plt.show()



In [22]:
estimates[START:END].cumsum().plot()
#mains[START:END].cumsum().plot()
appliances[START:END].cumsum().plot()
plt.show()



In [23]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')


/usr/local/lib/python2.7/dist-packages/pandas/io/pytables.py:2558: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->floating,key->block0_values] [items->['Fridge freezer']]

  warnings.warn(ws, PerformanceWarning)

In [23]: