In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [28]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 10,
    HIDDEN_LAYERS = [15, 15],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = ['fridge'], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 1
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-03-01")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['fridge freezer']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = df['Site meter'].diff().dropna()
appliances = df.iloc[:,:-1].fillna(0).diff().dropna()
del df

In [10]:
# standardise input
mains = (mains - mains.mean()) / mains.std()

# Constrain outputs to [-1,1] because we're using TanH
appliances /= appliances.max()

In [11]:
mains.plot()
plt.show()



In [12]:
appliances.plot()
plt.show()



In [13]:
# Build PyBrain dataset
N_OUTPUTS = appliances.shape[1]
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', appliances.values)

In [14]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[14]:
[array([[ 0.04871107],
        [-0.00340034],
        [-0.08244994],
        ..., 
        [ 0.02146469],
        [-0.0289024 ],
        [ 0.02211969]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [15]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
for i, n_cells in enumerate(CONFIG['HIDDEN_LAYERS']):
    net.addModule(LSTMLayer(n_cells, name=lstm_layer_name(i+1), peepholes=CONFIG['PEEPHOLES']))   

# Bias
bias = BiasUnit()
net.addModule(bias)
net.addConnection(FullConnection(bias, net['out'], name='c_output_bias'))
    
# Add connections

n_hidden_layers = len(CONFIG['HIDDEN_LAYERS'])
prev_layer_name = 'in'
for i in range(n_hidden_layers):
    hidden_layer_i = i + 1
    layer_name = lstm_layer_name(hidden_layer_i)
    net.addRecurrentConnection(FullConnection(net[layer_name], net[layer_name], name='c_' + layer_name + '_to_' + layer_name))
    net.addConnection(FullConnection(bias, net[layer_name], name='c_' + layer_name + '_bias'))
    net.addConnection(FullConnection(net[prev_layer_name], net[layer_name], name='c_' + prev_layer_name + '_to_' + layer_name))
    prev_layer_name = layer_name
    
layer_name = lstm_layer_name(n_hidden_layers)
net.addConnection(FullConnection(net[layer_name], net['out'], name='c_' + layer_name + '_to_out'))

net.sortModules()
print(net)


RecurrentNetwork-6
   Modules:
    [<BiasUnit 'BiasUnit-5'>, <LinearLayer 'in'>, <LSTMLayer 'LSTM1'>, <LSTMLayer 'LSTM2'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_LSTM1_bias': 'BiasUnit-5' -> 'LSTM1'>, <FullConnection 'c_LSTM1_to_LSTM2': 'LSTM1' -> 'LSTM2'>, <FullConnection 'c_LSTM2_bias': 'BiasUnit-5' -> 'LSTM2'>, <FullConnection 'c_LSTM2_to_out': 'LSTM2' -> 'out'>, <FullConnection 'c_in_to_LSTM1': 'in' -> 'LSTM1'>, <FullConnection 'c_output_bias': 'BiasUnit-5' -> 'out'>]
   Recurrent Connections:
    [<FullConnection 'c_LSTM1_to_LSTM1': 'LSTM1' -> 'LSTM1'>, <FullConnection 'c_LSTM2_to_LSTM2': 'LSTM2' -> 'LSTM2'>]

In [16]:
# define a training method
net.randomize()
net._setParameters(np.random.uniform(-0.1, 0.1, size=net.paramdim))
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [17]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 50 epochs...
epoch      0  total error   0.00054755   avg weight      0.057434
epoch      1  total error      0.28383   avg weight       0.11496
epoch      2  total error    0.0053926   avg weight       0.14007
epoch      3  total error    0.0093579   avg weight       0.15203
epoch      4  total error    0.0020641   avg weight       0.14917
epoch      5  total error   0.00028052   avg weight       0.16367
epoch      6  total error    0.0011256   avg weight       0.17387
epoch      7  total error   0.00036619   avg weight       0.16836
epoch      8  total error   0.00011663   avg weight       0.17313
epoch      9  total error   0.00044515   avg weight       0.18028
epoch     10  total error   0.00014945   avg weight       0.17341
epoch     11  total error   0.00012037   avg weight       0.17679
epoch     12  total error   0.00017621   avg weight       0.17836
epoch     13  total error   9.8247e-05   avg weight       0.18097
epoch     14  total error   0.00012237   avg weight       0.18509
epoch     15  total error   0.00010607   avg weight       0.18905
epoch     16  total error   9.2811e-05   avg weight       0.19332
epoch     17  total error   0.00010552   avg weight       0.20044
epoch     18  total error     9.46e-05   avg weight       0.20117
epoch     19  total error   9.2435e-05   avg weight       0.20882
epoch     20  total error   9.5818e-05   avg weight        0.2212
epoch     21  total error   9.2214e-05   avg weight       0.23239
epoch     22  total error   9.2153e-05   avg weight       0.24938
epoch     23  total error   9.2364e-05   avg weight       0.26662
epoch     24  total error   9.1422e-05   avg weight       0.29518
epoch     25  total error   9.1699e-05   avg weight        0.3296
epoch     26  total error   9.1229e-05   avg weight       0.37184
epoch     27  total error   9.0955e-05   avg weight       0.42569
epoch     28  total error   9.0739e-05   avg weight       0.48882
epoch     29  total error   9.0333e-05   avg weight       0.56881
epoch     30  total error   8.9873e-05   avg weight       0.64662
epoch     31  total error   8.9062e-05   avg weight       0.72193
epoch     32  total error   8.9093e-05   avg weight        0.7673
epoch     33  total error   8.8281e-05   avg weight       0.83128
epoch     34  total error   8.8464e-05   avg weight       0.94252
epoch     35  total error   8.7778e-05   avg weight       0.94468
epoch     36  total error   8.6773e-05   avg weight        1.0555
epoch     37  total error   8.6273e-05   avg weight        1.1794
epoch     38  total error   8.5878e-05   avg weight        1.0887
epoch     39  total error   8.5031e-05   avg weight        1.1229
epoch     40  total error   8.4723e-05   avg weight        1.1507
epoch     41  total error   8.4401e-05   avg weight        1.1725
epoch     42  total error   8.4018e-05   avg weight        1.2203
epoch     43  total error    8.517e-05   avg weight        1.2806
epoch     44  total error   8.4047e-05   avg weight        1.2007
epoch     45  total error   8.3803e-05   avg weight        1.2296
epoch     46  total error   8.3704e-05   avg weight        1.2732
epoch     47  total error   8.3583e-05   avg weight         1.268
epoch     48  total error   8.3592e-05   avg weight        1.3084
epoch     49  total error   8.3502e-05   avg weight        1.3088
Finished training.  total seconds = 29975.1012259

In [18]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.DataFrame(columns=appliances.columns, index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates.loc[date] = net.activate(mains_value)


Starting disaggregation...

In [19]:
estimates.plot()
plt.show()



In [20]:
appliances[START:END].plot()
plt.show()



In [24]:
appliances[START:END].cumsum().plot()
estimates[START:END].cumsum().plot()
plt.show()



In [25]:
mains[START:END].cumsum().plot()
plt.show()



In [31]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')


/usr/local/lib/python2.7/dist-packages/pandas/io/pytables.py:2558: PerformanceWarning: 
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->floating,key->block0_values] [items->['Fridge freezer']]

  warnings.warn(ws, PerformanceWarning)

In [34]:
ls -alh


total 2.3M
drwxrwxr-x 3 dk3810 dk3810 4.0K Jan 23 11:12 ./
drwxrwxr-x 4 dk3810 dk3810 4.0K Jan 21 17:55 ../
-rw------- 1 dk3810 dk3810 556K Jan 23 11:11 experiment_001.ipynb
drwxrwxr-x 2 dk3810 dk3810 4.0K Jan 23 11:12 .ipynb_checkpoints/
-rw-rw-r-- 1 dk3810 dk3810 1.8M Jan 23 11:12 neuronilm_estimates_001.hdf

In [ ]: