In [1]:
from __future__ import print_function
import seaborn as sns
from IPython.core.display import HTML
from IPython.display import Image
import pandas as pd
import calendar
from nilmtk.disaggregate.co_1d import CO_1d
from nilmtk.cross_validation import train_test_split
import nilmtk.preprocessing.electricity.building as prepb
import nilmtk.stats.electricity.building as bstats
import nilmtk.stats.electricity.single as sstats
from nilmtk.dataset import DataSet
from nilmtk.dataset import REDD
from nilmtk.plots import plot_series
from nilmtk.sensors.electricity import Measurement
from nilmtk.metrics import (mean_normalized_error_power,
fraction_energy_assigned_correctly,
f_score,
rms_error_power)
import warnings
warnings.filterwarnings("ignore")
sns.set(font="serif")
def pretty_print_dict(dictionary):
html = '<ul>'
for key, value in dictionary.iteritems():
html += '<li><strong>{}</strong>: '.format(key)
if isinstance(value, list):
html += '<ul>'
for item in value:
html += '<li>{}</li>'.format(item)
html += '</ul></li>'
else:
html += '{}</li>'.format(value)
html += '</ul>'
display(HTML(html))
In [2]:
from nilmtk.dataset import REDD
redd = REDD()
redd.load('/data/REDD/low_freq/')
# Note increase in system memory usage.
# NILMTK v0.2 does not eagerly load data like v0.1 does.
In [3]:
pretty_print_dict(redd.metadata)
In [4]:
!cat '/data/REDD/low_freq/house_1/labels.dat'
In [5]:
electric = redd.buildings[1].utility.electric
electric.appliances.keys()
# Note that the washer dryer has been converted to a single DualSupply appliance
Out[5]:
In [6]:
fridge = electric.appliances['fridge', 1]
# fridge is a 2-column matrix (a Python Pandas DataFrame object)
fridge.head()
Out[6]:
In [7]:
fridge["2011-04-18":"2011-04-19"].plot()
Out[7]:
All datasets have imperfections. We need to understand the imperfections of each dataset before proceeding with data analysis. We'll continue working with REDD but please note that we're not picking on REDD! All datasets have issues!
import nilmtk.stats.electricity.building as bstats
There are two reasons why data might not be recorded:
In the plots below, a dark rectangle shows the presence of a gap. A gap is defined as any pair of consecutive samples further apart than 4 times the sample period for that meter.
In [8]:
bstats.plot_missing_samples_using_rectangles(electric)
The advantages of plot_missing_samples_using_rectangles are:
The disadvantages are:
max_sample_period is somewhat subjectiveTo overcome both of these disadvantages, we have a sister function:
In [9]:
bstats.plot_missing_samples_using_bitmap(electric)
Out[9]:
In [10]:
bstats.proportion_of_energy_submetered(electric)
Out[10]:
In [11]:
bstats.proportion_per_appliance(electric)
Out[11]:
import nilmtk.stats.electricity.single as sstats
In [12]:
APPLIANCE_TYPE = 'oven'
appliance = electric.appliances[(APPLIANCE_TYPE, 1)]
In [13]:
sample_period = sstats.get_sample_period(appliance)
print('average sample period = {:.1f} seconds'.format(sample_period))
In [14]:
dropout_rate = sstats.get_dropout_rate(data=appliance)
print('average dropout rate = {:.1%}'.format(dropout_rate))
In [15]:
sstats.dropout_rate_per_period(data=appliance, rule='D').plot()
ylabel('dropout rate')
title('Dropout rate per day for ' + APPLIANCE_TYPE)
Out[15]:
In [16]:
THRESHOLD = 1
appliance_filtered = (appliance[appliance > THRESHOLD]).icol(0).dropna()
xlabel('power (watts)')
ylabel('frequency')
h = hist(appliance_filtered.values, bins=100)
In [17]:
ON_POWER_THRESHOLD = 1000
In [18]:
hours_on = sstats.hours_on(appliance, on_power_threshold=ON_POWER_THRESHOLD)
print(APPLIANCE_TYPE + ' was on for {:.1f} hours.'.format(hours_on))
In [19]:
kwh = sstats.energy(appliance)
print(APPLIANCE_TYPE + ' consumed {:.1f} kWh.'.format(kwh))
In [20]:
usage = sstats.usage_per_period(appliance,
freq='D',
on_power_threshold=ON_POWER_THRESHOLD)
usage.head(n=7)
Out[20]:
In [21]:
plot_series(appliance[:"2011-04-24"],
date_format='%Y-%m-%d',
tz_localize=False)
title(APPLIANCE_TYPE + ' power demand')
Out[21]:
In [22]:
dist = sstats.activity_distribution(appliance,
bin_size='H',
timespan='D',
on_power_threshold=ON_POWER_THRESHOLD)
# Graph formatting
x = np.arange(dist.size)
ylabel('frequency')
xlabel('hour of day')
title('Usage of the oven, hour-by-hour, over an average day')
xlim([0, 24])
xticks(range(0, 25, 6))
bar(x, dist.values)
Out[22]:
In [23]:
dist = sstats.activity_distribution(appliance,
bin_size='D',
timespan='W',
on_power_threshold=ON_POWER_THRESHOLD)
x = np.arange(dist.size)
ylabel('frequency')
xlabel('day of week')
title('')
xticks(np.arange(7)+0.5, calendar.day_name[0:7])
bar(x, dist.values)
Out[23]:
In [24]:
# Get a Series of booleans indicating when the oven is on:
on_series = sstats.on(appliance, on_power_threshold=ON_POWER_THRESHOLD)
# Now get the length of every on-duration
on_durations = sstats.durations(on_series,
on_or_off='on',
ignore_n_off_samples=10)
xlabel('minutes on')
ylabel('frequency')
title('Distribution of on-durations for oven')
h = hist(on_durations/60, bins=10)
redd.describe()
NUMBER OF BUILDINGS: 6
NUMBER OF APPLIANCES PER BUILDING:
min = 9.00
mean = 16.50
mode = 15.00
max = 23.00
std = 4.31
PROPORTION OF ENERGY SUBMETERED PER BUILDLING:
min = 0.58
mean = 0.70
mode = 0.58
max = 0.89
std = 0.11
DROPOUT RATE PER CHANNEL, INCLUDING LARGE GAPS:
min = 0.13
mean = 0.58
mode = 0.92
max = 0.92
std = 0.24
DROPOUT RATE PER CHANNEL, IGNORING LARGE GAPS:
min = 0.00
mean = 0.10
mode = 0.09
max = 0.17
std = 0.04
MAINS UPTIME PER BUILDING (DAYS):
min = 3.60
mean = 15.12
mode = 3.60
max = 19.44
std = 5.43
PROPORTION OF TIME SLICES WHERE > 70% ENERGY IS SUBMETERED:
min = 0.23
mean = 0.64
mode = 0.23
max = 0.98
std = 0.26
First, for reference, here's the plot from above showing missing samples:
In [25]:
building = redd.buildings[1]
bstats.plot_missing_samples_using_rectangles(building.utility.electric)
import nilmtk.preprocessing.electricity.building as prepb
In [26]:
building = prepb.downsample(building, rule='1T')
electric = building.utility.electric
bstats.plot_missing_samples_using_rectangles(electric)
In [27]:
building = prepb.fill_appliance_gaps(building)
# Now plot missing samples again:
electric = building.utility.electric
bstats.plot_missing_samples_using_rectangles(electric)
In [28]:
building = prepb.drop_missing_mains(building)
building = prepb.make_common_index(building)
electric = building.utility.electric
bstats.plot_missing_samples_using_rectangles(electric)
In [29]:
building.utility.electric = building.utility.electric.sum_split_supplies()
In [30]:
# Hack to trick NILMTK into thinking that REDD mains measures
# active power not apparent. This will be handled much better
# in NILMTK v0.2
mains = building.utility.electric.mains[(1,1)]
mains.rename(columns={Measurement('power','apparent'):
Measurement('power','active')},
inplace=True)
building = prepb.filter_contribution_less_than_x(building, x=5)
In [31]:
electric = building.utility.electric
bstats.plot_missing_samples_using_rectangles(electric)
from nilmtk.cross_validation import train_test_split
In [32]:
train, test = train_test_split(building, train_size = 0.5)
# train and test are now both Building objects
from nilmtk.disaggregate.co_1d import CO_1d
In [33]:
DISAGG_FEATURE = Measurement('power', 'active')
disaggregator = CO_1d()
disaggregator.train(train, disagg_features=[DISAGG_FEATURE])
# Below is the learnt model
Out[33]:
In [34]:
disaggregator.export_model('model.json')
!cat model.json
In [35]:
disaggregator.import_model('model.json')
!rm model.json
disaggregator.model
Out[35]:
In [36]:
disaggregator.disaggregate(test)
predicted_power = disaggregator.predictions
In [37]:
predicted_power.head()
Out[37]:
In [38]:
ax = test.utility.electric.mains[1,1]["2011-04-27 20:00":"2011-04-27 23:00:00"].plot()
ax.set_ylim([150,450])
title('aggregate power demand')
Out[38]:
In [39]:
ax = predicted_power["2011-04-27 20:00":"2011-04-27 23:00:00"].plot()
ax.set_ylim([0,200])
title('predicted appliance power demand')
Out[39]:
In [40]:
app_ground = test.utility.electric.appliances
ground_truth_power = pd.DataFrame(
{appliance: app_ground[appliance][DISAGG_FEATURE]
for appliance in app_ground})
ax = ground_truth_power["2011-04-27 20:00":"2011-04-27 23:00:00"].plot()
ax.set_ylim([0,250])
title('ground truth appliance power demand')
Out[40]:
from nilmtk.metrics import (mean_normalized_error_power,
fraction_energy_assigned_correctly,
f_score,
rms_error_power)
In [41]:
metrics = {
'mean normalized error power':
mean_normalized_error_power,
'fraction energy assigned correctly':
fraction_energy_assigned_correctly,
'f score': f_score,
'RMS Error Power': rms_error_power
}
In [42]:
for metric_name, metric_func in metrics.iteritems():
result = metric_func(predicted_power, ground_truth_power)
display(HTML('<h5>' + metric_name + '</h5>'))
if isinstance(result, dict):
pretty_print_dict(result)
else:
print(result)
print()
nilmtk website: nilmtk.github.io
Contact: jack.kelly@imperial.ac.uk
In [1]:
# CSS styling
from IPython.core.display import display, HTML
display(HTML(open('static/styles.css', 'r').read()));