In [2]:
from __future__ import print_function, division
import matplotlib
matplotlib.use('nbagg') # interactive plots in iPython. New in matplotlib v1.4
# %matplotlib inline

In [3]:
import matplotlib.pyplot as plt
from nilmtk import DataSet, MeterGroup
import pandas as pd
import numpy as np
from time import time


Couldn't import dot_parser, loading of dot files will not be possible.
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:13: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .func import (nansum, nanmax, nanmin, nanmean, nanstd, nanvar, median,
/usr/local/lib/python2.7/dist-packages/bottleneck/__init__.py:19: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility
  from .move import (move_sum, move_nansum,

In [4]:
from pybrain.supervised import RPropMinusTrainer
from pybrain.datasets import SequentialDataSet
from pybrain.structure import RecurrentNetwork, FullConnection, FeedForwardNetwork
from pybrain.structure.modules import LSTMLayer, BiasUnit, LinearLayer, TanhLayer, SigmoidLayer

In [5]:
CONFIG = dict(
    EPOCHS_PER_CYCLE = 5,
    CYCLES = 6,
    HIDDEN_LAYERS = [],
    PEEPHOLES = True,
    TRAINERCLASS = RPropMinusTrainer,
    # instead, you may also try
    # TRAINERCLASS = BackpropTrainer(net, dataset=trndata, verbose=True, 
    #                                momentum=0.9, learningrate=0.00001)
    INPUTS = [], #, 'hour of day (int)', 'outside temperature', 'is business day (-1, 1)'
    EXPERIMENT_NUMBER = 24
)

In [6]:
# Load dataset
dataset = DataSet('/data/mine/vadeec/merged/ukdale.h5')
dataset.set_window("2014-01-01", "2014-01-07")
elec = dataset.buildings[1].elec

In [7]:
# Select top-5 meters identified in UK-DALE paper
# APPLIANCES = ['kettle', 'dish washer', 'HTPC', 'washer dryer', 'fridge freezer']
APPLIANCES = ['kettle', 'toaster']
selected_meters = [elec[appliance] for appliance in APPLIANCES]
selected_meters.append(elec.mains())
selected = MeterGroup(selected_meters)

In [8]:
df = selected.dataframe_of_meters()

In [9]:
# Use human-readable column names
df.columns = selected.get_labels(df.columns)

In [10]:
mains = (df['Toaster'] + df['Kettle']).fillna(0).diff().dropna()
appliances = df['Toaster'].fillna(0).diff().dropna()
del df

In [11]:
# Constrain outputs to [-1,1] because we're using TanH
maximum = appliances.abs().max()
appliances /= maximum
mains_same_scale_as_appliances = mains / maximum

# standardise input
mains = (mains - mains.mean()) / mains.std()

In [12]:
ax = mains.plot()
ax = appliances.plot(ax=ax)
plt.show()



In [13]:
# Build PyBrain dataset
N_OUTPUTS = 1
N_INPUTS = 1
N = len(mains)
ds = SequentialDataSet(N_INPUTS, N_OUTPUTS)
ds.newSequence()
ds.setField('input', pd.DataFrame(mains).values)
ds.setField('target', pd.DataFrame(appliances).values)

In [14]:
ds.getSequence(0)


/usr/local/lib/python2.7/dist-packages/PyBrain-0.3.3-py2.7.egg/pybrain/datasets/sequential.py:45: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  return self.getField(field)[seq[index]:]
Out[14]:
[array([[  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07],
        ..., 
        [  1.63132217e-07],
        [  1.63132217e-07],
        [  1.63132217e-07]]), array([[ 0.],
        [ 0.],
        [ 0.],
        ..., 
        [ 0.],
        [ 0.],
        [ 0.]])]

In [19]:
# Build network
net = RecurrentNetwork()

def lstm_layer_name(i):
    return 'LSTM{:d}'.format(i)

# Add modules
net.addInputModule(LinearLayer(dim=ds.indim, name='in'))
net.addOutputModule(TanhLayer(dim=ds.outdim, name='out'))
net.addModule(TanhLayer(10, name='tanh_input')) 

# Bias
bias = BiasUnit()
net.addModule(bias)

#c_output_bias = FullConnection(bias, net['out'], name='c_output_bias')
#c_output_bias._setParameters(np.zeros(1))
#net.addConnection(c_output_bias)

c_tanh_input_bias = FullConnection(bias, net['tanh_input'], name='c_tanh_input_bias')
c_tanh_input_bias._params = np.zeros(c_tanh_input_bias.paramdim)
net.addConnection(c_tanh_input_bias)

forwards_connection = FullConnection(net['in'], net['tanh_input'], name='c_in_to_tanh')
forwards_connection._params = np.random.uniform(-0.2, 0.2, size=forwards_connection.paramdim)
net.addConnection(forwards_connection)

connect_to_out = FullConnection(net['tanh_input'], net['out'], name='c_tanh_to_out')
connect_to_out._params = np.random.uniform(-0.2, 0.2, size=connect_to_out.paramdim)
net.addConnection(connect_to_out)

net.sortModules()
print(net)


RecurrentNetwork-16
   Modules:
    [<BiasUnit 'BiasUnit-15'>, <LinearLayer 'in'>, <TanhLayer 'tanh_input'>, <TanhLayer 'out'>]
   Connections:
    [<FullConnection 'c_in_to_tanh': 'in' -> 'tanh_input'>, <FullConnection 'c_tanh_input_bias': 'BiasUnit-15' -> 'tanh_input'>, <FullConnection 'c_tanh_to_out': 'tanh_input' -> 'out'>]
   Recurrent Connections:
    []

In [20]:
# define a training method
trainer = CONFIG['TRAINERCLASS'](net, dataset=ds, verbose=True)

In [21]:
# carry out the training
net.reset()
# train_errors = []
t0 = time()
EPOCHS = CONFIG['EPOCHS_PER_CYCLE'] * CONFIG['CYCLES']
# trainer.trainUntilConvergence(maxEpochs=EPOCHS, verbose=True)
# start_time = time()
print("Starting training with", EPOCHS, "epochs...")
for i in xrange(CONFIG['CYCLES']):
    trainer.trainEpochs(CONFIG['EPOCHS_PER_CYCLE'])
#    train_errors.append(trainer.testOnData())
    # epoch = (i+1) * CONFIG['EPOCHS_PER_CYCLE']
    # seconds_elapsed = time() - start_time
    # seconds_per_epoch = seconds_elapsed / epoch
    # seconds_remaining = (EPOCHS - epoch) * seconds_per_epoch
    # td_elapsed = timedelta(seconds=seconds_elapsed)
    # td_elapsed_str = str(td_elapsed).split('.')[0]
    # eta = (datetime.now() + timedelta(seconds=seconds_remaining)).time()
    # eta = eta.strftime("%H:%M:%S")
    # print("\r epoch = {}/{}    error = {}  elapsed = {}   ETA = {}"
    #       .format(epoch, EPOCHS, train_errors[-1], td_elapsed_str, eta),
    #       end="")
    # stdout.flush()
print("Finished training.  total seconds =", time() - t0)


Starting training with 30 epochs...
epoch      0  total error   0.00026719   avg weight      0.088578
epoch      1  total error    0.0042776   avg weight       0.12619
epoch      2  total error    0.0040618   avg weight       0.11283
epoch      3  total error    0.0012811   avg weight       0.11392
epoch      4  total error   0.00084936   avg weight      0.090627
epoch      5  total error   0.00021036   avg weight      0.082968
epoch      6  total error   0.00037266   avg weight      0.076357
epoch      7  total error   0.00016642   avg weight      0.078732
epoch      8  total error   0.00016555   avg weight      0.068608
epoch      9  total error    0.0001898   avg weight      0.068807
epoch     10  total error   0.00015215   avg weight      0.069771
epoch     11  total error   0.00016071   avg weight      0.069725
epoch     12  total error   0.00015071   avg weight      0.070775
epoch     13  total error   0.00015529   avg weight      0.070229
epoch     14  total error    0.0001496   avg weight      0.070539
epoch     15  total error   0.00014922   avg weight      0.071817
epoch     16  total error   0.00014971   avg weight      0.072686
epoch     17  total error   0.00014866   avg weight      0.070897
epoch     18  total error   0.00014877   avg weight      0.071682
epoch     19  total error   0.00014863   avg weight      0.070521
epoch     20  total error   0.00014832   avg weight      0.071394
epoch     21  total error   0.00014814   avg weight      0.071562
epoch     22  total error    0.0001479   avg weight      0.070967
epoch     23  total error   0.00014761   avg weight      0.071654
epoch     24  total error    0.0001473   avg weight      0.071777
epoch     25  total error   0.00014693   avg weight      0.071904
epoch     26  total error   0.00014652   avg weight      0.072413
epoch     27  total error   0.00014605   avg weight      0.072934
epoch     28  total error   0.00014552   avg weight      0.073345
epoch     29  total error   0.00014553   avg weight       0.07359
Finished training.  total seconds = 1400.25071597

In [22]:
# Disaggregate!
START = "2014-01-01"
END = "2014-01-03"
print("Starting disaggregation...")
net.reset()
estimates = pd.Series(index=appliances[START:END].index)
for date, mains_value in mains[START:END].iteritems():
    estimates[date] = net.activate(mains_value)


Starting disaggregation...

In [23]:
estimates.plot()
plt.show()



In [24]:
mains[START:END].plot()
plt.show()



In [25]:
appliances[START:END].plot()
plt.show()



In [26]:
ax = estimates[START:END].cumsum().plot(label='estimates')
ax = mains_same_scale_as_appliances[START:END].cumsum().plot(ax=ax, label='aggregate')
ax = appliances[START:END].cumsum().plot(ax=ax)
plt.legend()
plt.show()



In [27]:
estimates.cumsum().to_hdf('neuronilm_estimates_{:03d}.hdf'.format(CONFIG['EXPERIMENT_NUMBER']), 'df')

In [ ]: