In [9]:
from __future__ import print_function 
import seaborn as sns
from IPython.core.display import HTML
from IPython.display import Image
import pandas as pd

from nilmtk.disaggregate.co_1d import CO_1d
from nilmtk.cross_validation import train_test_split
import nilmtk.preprocessing.electricity.building as prepb
import nilmtk.stats.electricity.building as bstats
import nilmtk.stats.electricity.single as sstats
from nilmtk.dataset import DataSet
from nilmtk.dataset import REDD
from nilmtk.building import Building
from nilmtk.plots import plot_series
from nilmtk.sensors.electricity import Measurement
from nilmtk.utils import get_immediate_subdirectories
from nilmtk.sensors.electricity import Measurement
from nilmtk.sensors.electricity import ApplianceName
from nilmtk.sensors.electricity import MainsName
from nilmtk.metrics import (mean_normalized_error_power, 
                            fraction_energy_assigned_correctly, 
                            f_score,
                            rms_error_power)

import calendar
import os
from collections import defaultdict

import warnings
warnings.filterwarnings("ignore")

sns.set(font="serif")

def pretty_print_dict(dictionary):
    html = '<ul>'
    for key, value in dictionary.iteritems():
        html += '<li><strong>{}</strong>: '.format(key)
        if isinstance(value, list):
            html += '<ul>'
            for item in value:
                html += '<li>{}</li>'.format(item)
            html += '</ul></li>'
        else:
            html += '{}</li>'.format(value)
    html += '</ul>'
    display(HTML(html))
    
# Mapping between appliances actual name
appliance_name_mapping = {
    'ksac': 'kitchen outlets',
    'tvroom': 'misc',
    'garage/refrigerator': 'fridge',
    'office': 'desktop computer',
    'dryg': 'washer dryer',
    'bath': 'bathroom misc',
    'genlight': 'lighting',
    'oven': 'oven',
    'bedroom': 'bedroom misc',
    'subl': 'subpanel',
    'masterbed': 'bedroom misc',
    'bathroom': 'bathroom misc',
    'livingroom': 'misc',
    'sprinkler': 'sprinkler',
    'disposal': 'disposal',
    'masterbath': 'bathroom misc',
    'microwave': 'microwave',
    'drye': 'washer dryer',
    'smallappliance': 'misc',
    'washer': 'washing machine',
    'furnace': 'central heater boiler',
    'gri': 'grid',
    'lighting&plugs': 'plugs',
    'famroom': 'misc',
    'dryer': 'washer dryer',
    'diningroom': 'misc',
    'ove': 'oven',
    'backyard': 'misc',
    'cooktop': 'cooker',
    'refrigerator': 'fridge',
    'kitchen': 'kitchen misc',
    'dishwasher': 'dishwasher',
    'theater': 'entertainment',
    'washingmachine': 'washing machine',
    'car': 'electric vehicle',
    'air': 'air conditioner',
    'garage': 'misc',
    'range': 'cooker',
    'waterheater': 'water heater',
    'security': 'security',
    'ai': 'air conditioner'
}

Testing the Nilmtk toolkit


In [10]:
class Pecan(DataSet):

    def __init__(self):
        super(Pecan, self).__init__()
        self.metadata = {
            'name': 'Pecan Street',
            'urls': ['http://www.pecanstreet.org/',
                     'http://www.pecanstreet.org/2013/04/with-free-sample-data'
                     '-set-pecan-street-research-institute-expands-access-to'
                     '-world-class-energy-use-data-to-global'
                     '-university-researchers/']
        }

    def load(self, root_directory):
        """Load entire dataset into memory"""
        building_names = self.load_building_names(root_directory)
        print (building_names)
        for building_name in building_names:
            self.load_building(root_directory, building_name)

    def add_mains(self, building, df):
        # Find columns containing mains in them
        mains_column_names = [x for x in df.columns if x]

        # Adding mains
        building.utility.electric.mains = {}
        building.utility.electric.mains[
            MainsName(1, 1)] = df[mains_column_names]
        return building

    def add_appliances(self, building, df):
        # Getting a list of appliance names
        appliance_names = list(set([a.split("_")[0] for a in df.columns
                                    if "mains" not in a]))

        # Adding appliances
        building.utility.electric.appliances = {}
        for appliance in appliance_names:
            # Finding headers corresponding to the appliance
            names = [x for x in df.columns if x.split("_")[0] == appliance]

            # TODO: Replace column names and remove the appliance name from
            # them
            building.utility.electric.appliances[appliance] = df[names]
        return building

    def standardize(self, df, building):

        # Converting power from kW to W
        # Note some homes contain Voltage as well, need to multiply that
        # back with 1e3
        df = df * 1e3

        # Convert to standard appliance names
        # 1. Mains is use [kW]; replace space with mains_0_active
        # 2. If voltage is present, rename the column and divide it by 1e3
        # 3. If 'gen' is present, delete the column; TODO think about where
        # to put this column
        # 4. Delete 'Grid' column; TODO same as #3
        # 5. Lower case all appliance names
        # 6 Replace " " with "_" in appliance name
        # 7. Appliance names should have separate active and apparent fields
        # (have a *)

        # 1
        df = df.rename(columns={'use [kW]': Measurement('power', 'active')})
        #print df.columns

        # Adding Mains Power
        building.utility.electric.mains = {}
        building.utility.electric.mains[
            MainsName(1, 1)] = df[[Measurement('power', 'active')]]
        df = df.drop(Measurement('power', 'active'), 1)

        # 2
        if "LEG1V [V]" in df.columns:
            df = df.rename(columns={"LEG1V [V]": Measurement('voltage', '')})

            # Adding voltage if it exists
            building.utility.electric.mains[
                MainsName(1, 1)][Measurement('voltage', '')] = df[Measurement('voltage', '')] / 1e3
            df = df.drop(Measurement('voltage', ''), 1)
            '''For now delete leg2
            df = df.rename(columns=lambda x: x.replace("LEG2V [V]",
                                                       "mains_2_voltage"))            
            df['mains_2_voltage'] = df['mains_2_voltage'] / 1e3'''

            # TODO: See what to do with this bit of information
            df = df.drop('LEG2V [V]', 1)

        # 3
        if "gen [kW]" in df.columns:
            df = df.drop('gen [kW]', 1)

        # 4
        if 'Grid [kW]' in df.columns:
            df = df.drop('Grid [kW]', 1)
        if "Grid* [kVA]" in df.columns:
            df = df.drop('Grid* [kVA]', 1)

        # 5
        df = df.rename(columns=lambda x: x.lower())

        # 6
        df = df.rename(columns=lambda x: x.replace(" ", "_"))

        # 7
        df = df.rename(columns=lambda x: x.replace("[kw]", "active"))
        df = df.rename(columns=lambda x: x.replace("[kva]", "apparent"))
        df = df.rename(columns=lambda x: x.replace("*", ""))

        # List of appliance names
        appliance_names = list(set([a.split("_")[0] for a in df.columns
                                    if type(a) != type(Measurement('power', 'active'))]))

        # Adding appliances
        building.utility.electric.appliances = {}
        building_appliance_count = defaultdict(int)
        for appliance in appliance_names:
            # Finding headers corresponding to the appliance
            names = [x for x in df.columns if x.split("_")[0] == appliance]

            names_modified = [Measurement('power', x.split("_")[1])
                              for x in names]
            name_modification = {names[i]: names_modified[i] for i in range(len(names))}

            # TODO: Replace column names and remove the appliance name from
            # them
            if appliance[:-1] in appliance_name_mapping.keys():
                appliance_name = appliance_name_mapping[appliance[:-1]]
                building_appliance_count[appliance_name] += 1
                appliance_instance = building_appliance_count[appliance_name]
                building.utility.electric.appliances[
                    ApplianceName(appliance_name, appliance_instance)] = df[names]
                building.utility.electric.appliances[
                    ApplianceName(appliance_name, appliance_instance)] = building.utility.electric.appliances[
                    ApplianceName(appliance_name, appliance_instance)].rename(columns=name_modification)

                building.utility.electric.appliances[
                    ApplianceName(appliance_name, appliance_instance)] = building.utility.electric.appliances[
                    ApplianceName(appliance_name, appliance_instance)].astype('float32')
            building.utility.electric.mains[
                MainsName(1, 1)] = building.utility.electric.mains[
                MainsName(1, 1)].astype('float32')

        return building

In [30]:
class Pecan_15min(Pecan):

    def __init__(self):
        super(Pecan_15min, self).__init__()

    def load_building(self, root_directory, data_file, sheet_name):
        spreadsheet = pd.ExcelFile(os.path.join(root_directory, data_file))
        df = spreadsheet.parse(sheet_name, index_col=0,
                               date_parser=True)
        # Create a new building
        building = Building()

        building = self.standardize(df, building)

        # Adding this building to dict of buildings
        #building_name = building_name.replace(" ", "_")
        #building_number = int(building_name[-2:])
        #self.buildings[building_number] = building

    def load_building_names(self, root_directory, data_file):
        spreadsheet = pd.ExcelFile(os.path.join(root_directory,
                                                data_file))
        return spreadsheet.sheet_names

In [31]:
from nilmtk.dataset import pecan

dataset = Pecan_15min()
dataset.load_building("../../data","15_min/test_data.xlsx","pecan_street_raw_2013_egauge15_")


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-31-56339751e247> in <module>()
      2 
      3 dataset = Pecan_15min()
----> 4 dataset.load_building("../../data","15_min/test_data.xlsx","pecan_street_raw_2013_egauge15_")

<ipython-input-30-fada4c49e83f> in load_building(self, root_directory, data_file, sheet_name)
     11         building = Building()
     12 
---> 13         building = self.standardize(df, building)
     14 
     15         # Adding this building to dict of buildings

<ipython-input-10-d356a2bd0a75> in standardize(self, df, building)
     50         # Note some homes contain Voltage as well, need to multiply that
     51         # back with 1e3
---> 52         df = df * 1e3
     53 
     54         # Convert to standard appliance names

/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in f(self, other, axis, level, fill_value)
    240                 raise ValueError("Bad argument shape")
    241         else:
--> 242             return self._combine_const(other, na_op)
    243 
    244     f.__name__ = name

/usr/local/lib/python2.7/site-packages/pandas/core/frame.pyc in _combine_const(self, other, func, raise_on_error)
   3902             return self
   3903 
-> 3904         new_data = self._data.eval(func, other, raise_on_error=raise_on_error)
   3905         return self._constructor(new_data)
   3906 

/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in eval(self, *args, **kwargs)
   1281 
   1282     def eval(self, *args, **kwargs):
-> 1283         return self.apply('eval', *args, **kwargs)
   1284 
   1285     def putmask(self, *args, **kwargs):

/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in apply(self, f, *args, **kwargs)
   1267                 applied = f(blk, *args, **kwargs)
   1268             else:
-> 1269                 applied = getattr(blk,f)(*args, **kwargs)
   1270 
   1271             if isinstance(applied,list):

/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in eval(self, func, other, raise_on_error, try_cast)
    468                 is_transposed = True
    469 
--> 470         values, other = self._try_coerce_args(values, other)
    471         args = [ values, other ]
    472         try:

/usr/local/lib/python2.7/site-packages/pandas/core/internals.pyc in _try_coerce_args(self, values, other)
    873             other = tslib.iNaT
    874         else:
--> 875             other = other.view('i8')
    876 
    877         return values, other

AttributeError: 'float' object has no attribute 'view'

In [ ]: