In [1]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [2]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [3]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection, FeedForwardNetwork
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [4]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 20,
    HIDDEN_LAYERS = [],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 26
)

In [5]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-02-01")
elec = dataset.buildings[1].elec

In [6]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [7]:
df = selected.dataframe_of_meters()

In [8]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [9]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [10]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [11]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [12]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [13]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[13]:
[array([[  3.85928275e-08],
        [  3.85928275e-08],
        [  3.85928275e-08],
        ..., 
        [  3.85928275e-08],
        [  3.85928275e-08],
        [  3.85928275e-08]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [14]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(dim=ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
net.addModule(TanhLayer(10, name='tanh_input')) 
net.addModule(TanhLayer(10, name='tanh_output')) 

# Bias
bias = BiasUnit()
net.addModule(bias)

#c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
#c_output_bias._setParameters(np.zeros(1))
#net.addConnection(c_output_bias)

c_tanh_input_bias = FullConnection(bias, net['tanh_input'], name='c_tanh_input_bias')
c_tanh_input_bias._params = np.zeros(c_tanh_input_bias.paramdim)
net.addConnection(c_tanh_input_bias)

c_tanh_output_bias = FullConnection(bias, net['tanh_output'], name='c_tanh_output_bias')
c_tanh_output_bias._params = np.zeros(c_tanh_output_bias.paramdim)
net.addConnection(c_tanh_output_bias)

forwards_connection = FullConnection(net['in'], net['tanh_input'], name='c_in_to_tanh')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

forwards_connection = FullConnection(net['tanh_input'], net['tanh_output'], name='c_tanh_input_to_tanh_output')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

connect_to_out = FullConnection(net['tanh_output'], net['out'], name='c_tanh_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-6
   Modules:
    [<BiasUnit 'BiasUnit-5'>, <LinearLayer 'in'>, <TanhLayer 'tanh_input'>, <TanhLayer 'tanh_output'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_in_to_tanh': 'in' -> 'tanh_input'>, <FullConnection 'c_tanh_input_bias': 'BiasUnit-5' -> 'tanh_input'>, <FullConnection 'c_tanh_input_to_tanh_output': 'tanh_input' -> 'tanh_output'>, <FullConnection 'c_tanh_output_bias': 'BiasUnit-5' -> 'tanh_output'>, <FullConnection 'c_tanh_to_out': 'tanh_output' -> 'out'>]
   Recurrent Connections:
    []

In [15]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True, delta0=0.001)

In [23]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 100 epochs...
epoch    100  total error   2.4873e-05   avg weight        1.0007
epoch    101  total error   2.4745e-05   avg weight        1.0491
epoch    102  total error   2.4734e-05   avg weight        1.1076
epoch    103  total error   2.4739e-05   avg weight        1.0374
epoch    104  total error   2.4693e-05   avg weight        1.0724
epoch    105  total error   2.4721e-05   avg weight        1.0374
epoch    106  total error   2.4669e-05   avg weight        1.0548
epoch    107  total error    2.462e-05   avg weight        1.0723
epoch    108  total error   2.4571e-05   avg weight        1.0934
epoch    109  total error   2.4518e-05   avg weight        1.1187
epoch    110  total error   2.4526e-05   avg weight        1.0882
epoch    111  total error   2.4503e-05   avg weight        1.1034
epoch    112  total error    2.446e-05   avg weight        1.1186
epoch    113  total error   2.4415e-05   avg weight         1.137
epoch    114  total error   2.4384e-05   avg weight        1.1151
epoch    115  total error   2.4359e-05   avg weight        1.1263
epoch    116  total error   2.4299e-05   avg weight        1.1376
epoch    117  total error   2.4243e-05   avg weight        1.1514
epoch    118  total error   2.4188e-05   avg weight        1.1681
epoch    119  total error     2.42e-05   avg weight        1.1861
epoch    120  total error    2.416e-05   avg weight        1.2095
epoch    121  total error   2.4107e-05   avg weight        1.2375
epoch    122  total error   2.4092e-05   avg weight        1.2053
epoch    123  total error   2.4089e-05   avg weight        1.2208
epoch    124  total error   2.4044e-05   avg weight        1.2378
epoch    125  total error   2.4019e-05   avg weight         1.258
epoch    126  total error   2.3972e-05   avg weight        1.2824
epoch    127  total error   2.3926e-05   avg weight        1.3118
epoch    128  total error   2.3922e-05   avg weight        1.2785
epoch    129  total error   2.3935e-05   avg weight        1.2971
epoch    130  total error   2.3884e-05   avg weight        1.3126
epoch    131  total error   2.3844e-05   avg weight         1.334
epoch    132  total error   2.3804e-05   avg weight        1.3595
epoch    133  total error   2.3758e-05   avg weight        1.3903
epoch    134  total error   2.3803e-05   avg weight        1.4274
epoch    135  total error   2.3771e-05   avg weight        1.3829
epoch    136  total error    2.374e-05   avg weight        1.4051
epoch    137  total error   2.3729e-05   avg weight        1.3845
epoch    138  total error   2.3732e-05   avg weight        1.3963
epoch    139  total error   2.3722e-05   avg weight        1.4057
epoch    140  total error   2.3701e-05   avg weight        1.4192
epoch    141  total error   2.3682e-05   avg weight        1.4352
epoch    142  total error   2.3684e-05   avg weight        1.4545
epoch    143  total error    2.367e-05   avg weight        1.4332
epoch    144  total error   2.3666e-05   avg weight        1.4432
epoch    145  total error   2.3648e-05   avg weight        1.4549
epoch    146  total error   2.3631e-05   avg weight        1.4688
epoch    147  total error   2.3626e-05   avg weight        1.4535
epoch    148  total error   2.3616e-05   avg weight        1.4623
epoch    149  total error   2.3605e-05   avg weight        1.4714
epoch    150  total error   2.3597e-05   avg weight        1.4797
epoch    151  total error    2.358e-05   avg weight        1.4919
epoch    152  total error   2.3578e-05   avg weight        1.5064
epoch    153  total error   2.3573e-05   avg weight        1.4906
epoch    154  total error    2.357e-05   avg weight        1.4979
epoch    155  total error   2.3556e-05   avg weight        1.5067
epoch    156  total error   2.3544e-05   avg weight        1.5171
epoch    157  total error    2.354e-05   avg weight        1.5296
epoch    158  total error   2.3528e-05   avg weight        1.5446
epoch    159  total error   2.3526e-05   avg weight        1.5283
epoch    160  total error   2.3519e-05   avg weight        1.5358
epoch    161  total error   2.3505e-05   avg weight         1.545
epoch    162  total error   2.3493e-05   avg weight        1.5558
epoch    163  total error   2.3493e-05   avg weight        1.5688
epoch    164  total error   2.3484e-05   avg weight        1.5845
epoch    165  total error   2.3475e-05   avg weight        1.5676
epoch    166  total error   2.3475e-05   avg weight        1.5754
epoch    167  total error   2.3462e-05   avg weight        1.5849
epoch    168  total error   2.3464e-05   avg weight        1.5961
epoch    169  total error   2.3455e-05   avg weight         1.584
epoch    170  total error   2.3451e-05   avg weight        1.5912
epoch    171  total error   2.3446e-05   avg weight        1.5967
epoch    172  total error   2.3439e-05   avg weight        1.6049
epoch    173  total error   2.3436e-05   avg weight        1.6146
epoch    174  total error   2.3431e-05   avg weight        1.6041
epoch    175  total error   2.3422e-05   avg weight        1.6104
epoch    176  total error   2.3419e-05   avg weight        1.6151
epoch    177  total error   2.3407e-05   avg weight        1.6222
epoch    178  total error   2.3398e-05   avg weight        1.6306
epoch    179  total error   2.3383e-05   avg weight        1.6408
epoch    180  total error   2.3386e-05   avg weight        1.6529
epoch    181  total error   2.3381e-05   avg weight        1.6675
epoch    182  total error   2.3373e-05   avg weight        1.6519
epoch    183  total error   2.3364e-05   avg weight        1.6591
epoch    184  total error   2.3355e-05   avg weight        1.6679
epoch    185  total error   2.3341e-05   avg weight        1.6785
epoch    186  total error   2.3343e-05   avg weight        1.6911
epoch    187  total error   2.3337e-05   avg weight        1.7063
epoch    188  total error    2.333e-05   avg weight        1.6901
epoch    189  total error   2.3322e-05   avg weight        1.6975
epoch    190  total error   2.3313e-05   avg weight        1.7067
epoch    191  total error   2.3312e-05   avg weight        1.7176
epoch    192  total error    2.331e-05   avg weight        1.7308
epoch    193  total error   2.3301e-05   avg weight        1.7168
epoch    194  total error   2.3298e-05   avg weight        1.7232
epoch    195  total error   2.3287e-05   avg weight        1.7311
epoch    196  total error    2.329e-05   avg weight        1.7406
epoch    197  total error   2.3284e-05   avg weight        1.7519
epoch    198  total error   2.3278e-05   avg weight        1.7399
epoch    199  total error   2.3275e-05   avg weight        1.7454
Finished training.  total seconds = 15306.0030451

In [24]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)


Starting disaggregation...

In [25]:
estimates.plot()
plt.show()



In [26]:
mains[START:END].plot()
plt.show()



In [27]:
appliances[START:END].plot()
plt.show()



In [28]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()



In [29]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [22]: