In [1]:
import matplotlib.pyplot as plt
#import seaborn as sns
#plt.style.use('ggplot')
%matplotlib inline
from nilmtk import DataSet
import nilmtk
import warnings
import pandas as pd
import numpy as np
import sys
warnings.filterwarnings("ignore")

In [2]:
ds = DataSet("/Users/nipunbatra/Downloads/2months_100homes.hdf5")

In [3]:
num_appliances_per_home = {}
seen = 0
for building in ds.buildings:
    seen+=1
    sys.stdout.write("\r "+str(seen*100.0/100)+"% done")
    sys.stdout.flush()
    e = ds.buildings[building].elec
    if e is not None:
        num_appliances_per_home[building] = len(e.appliances)
    else:
        print building


 100.0% done

In [4]:
df_num_appliances = pd.DataFrame({"num_appliances":num_appliances_per_home})

In [5]:
zero_appliances = df_num_appliances[df_num_appliances["num_appliances"]==0]
zero_appliances.head()


Out[5]:
num_appliances
4 0
24 0
33 0
37 0
38 0

In [7]:
len(zero_appliances)


Out[7]:
15

In [8]:
categories = set()
appliance_types = set()
for appliance in nilmtk.global_meter_group.appliances:
    categories.update(appliance.categories())
    appliance_types.add(appliance.type['type'])

In [9]:
appliance_types


Out[9]:
{'air conditioner',
 'appliance',
 'dish washer',
 'electric furnace',
 'electric hot tub heater',
 'electric space heater',
 'electric swimming pool heater',
 'electric vehicle',
 'electric water heating appliance',
 'freezer',
 'fridge',
 'light',
 'microwave',
 'oven',
 'security alarm',
 'sockets',
 'spin dryer',
 'stove',
 'unknown',
 'washer dryer',
 'washing machine',
 'waste disposal unit'}

In [229]:
ALL_APPLIANCES = list(appliance_types)
ALL_APPLIANCES


Out[229]:
['stove',
 'electric vehicle',
 'oven',
 'sockets',
 'microwave',
 'electric space heater',
 'unknown',
 'security alarm',
 'freezer',
 'electric furnace',
 'dish washer',
 'electric hot tub heater',
 'electric water heating appliance',
 'fridge',
 'waste disposal unit',
 'washer dryer',
 'electric swimming pool heater',
 'light',
 'washing machine',
 'appliance',
 'spin dryer',
 'air conditioner']

In [ ]:
out = {}
for building in ds.buildings:
#for building in [1]:


    try:
        print building
        e = ds.buildings[building].elec
        out[building] = {}
        out[building]["total"] = e.mains().total_energy()["active"]
        out[building]["submetered"] = 100*e.proportion_of_energy_submetered()
        for appliance in ALL_APPLIANCES:
            print appliance
            m = e.select_using_appliances(type = appliance)
            if len(m.meters)>0:
                out[building][appliance] = m.total_energy()["active"]
            else:
                out[building][appliance] = np.NAN
    except Exception as e:
        pass

In [174]:
df_total = pd.DataFrame(out).T

In [176]:
df_total = df_total[df_total>0]

In [177]:
len(df_total)


Out[177]:
100

In [102]:
df_total = df_total.drop('submetered', 1)

In [103]:
len(df_total)


Out[103]:
100

In [104]:
#df_total = df_total.dropna()

In [105]:
df_total = df_total[df_total["total"]>0]
df_total = df_total[df_total["Washing"]>0]

In [106]:
df_total = df_total[df_total["HVAC"]>0]
df_total = df_total[df_total["Unmetered"]>0]

In [108]:
df_total_percentage=100*df_total[[col for col in df_total.columns if col not in ['total']]]/df_total[['total']]

In [109]:
df_reduced=df_total[[col for col in df_total.columns if col not in ['total']]]

In [110]:
for col in df_reduced.columns:
    df_reduced[col] = 100*df_reduced[col]/df_total['total']

In [111]:
df_reduced.head()


Out[111]:
Electric vehicle Fridge HVAC Kitchen Lighting Others Sockets Washing Water heating Unmetered
1 NaN NaN 0.026188 1.775346 NaN NaN NaN 2.445574 NaN 99.929510
3 NaN 5.016922 33.767455 3.259490 30.538475 0.023428 11.719942 4.116258 NaN 98.525967
7 NaN 2.629206 1.572423 0.556408 NaN NaN NaN 1.196294 NaN 99.900815
8 NaN NaN 0.400722 NaN NaN NaN NaN 18.211417 NaN 99.689798
9 NaN NaN 71.444723 NaN NaN NaN 22.094554 4.014734 NaN 98.374100

In [189]:
dft = df_total.copy()
dft = dft/dft.total
dft = dft[[x for x in df_total.columns if x!="total"]]

In [197]:
abc = df_total.copy()

In [198]:
for col in abc.columns:
    abc[col] = abc[col]/abc['total']

In [200]:
abc.head()


Out[200]:
air conditioner appliance dish washer electric furnace electric hot tub heater electric space heater electric swimming pool heater electric vehicle electric water heating appliance freezer ... security alarm sockets spin dryer stove submetered total unknown washer dryer washing machine waste disposal unit
1 0.000262 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN 0.024456 NaN 0.004806 1 NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 0.142502 NaN 0.028147 0.195173 NaN NaN NaN NaN NaN NaN ... NaN 0.117199 0.039529 NaN 0.060138 1 NaN NaN 2.402583 0.34455
4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 1 NaN NaN NaN NaN
5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 24 columns


In [207]:
d = abc[[x for x in abc.columns if x not in ["total", "submetered"]]]

In [211]:
d = d[d<=1]

In [225]:
# %load common.py
"""Contains helper functions"""
import pandas as pd
import numpy as np
import requests
from StringIO import StringIO
import matplotlib as mpl
import matplotlib.pyplot as plt


uuid_path = "http://energy.iiitd.edu.in:9102/backend/api"
data_path = "http://energy.iiitd.edu.in:9102/backend/api/data/"


import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from math import sqrt

SPINE_COLOR = 'gray'

_to_ordinalf_np_vectorized = np.vectorize(mdates._to_ordinalf)

def plot_series(series, **kwargs):
    """Plot function for series which is about 5 times faster than
    pd.Series.plot().

    Parameters
    ----------
    series : pd.Series
    ax : matplotlib Axes, optional
        If not provided then will generate our own axes.
    fig : matplotlib Figure
    date_format : str, optional, default='%d/%m/%y %H:%M:%S'
    tz_localize : boolean, optional, default is True
        if False then display UTC times.

    Can also use all **kwargs expected by `ax.plot`
    """
    ax = kwargs.pop('ax', None)
    fig = kwargs.pop('fig', None)
    date_format = kwargs.pop('date_format', '%d/%m/%y %H:%M:%S')
    tz_localize = kwargs.pop('tz_localize', True)

    if ax is None:
        ax = plt.gca()

    if fig is None:
        fig = plt.gcf()

    x = _to_ordinalf_np_vectorized(series.index.to_pydatetime())
    ax.plot(x, series, **kwargs)
    tz = series.index.tzinfo if tz_localize else None
    ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format, 
                                                      tz=tz))
    ax.set_ylabel('watts')
    fig.autofmt_xdate()
    return ax


def latexify(fig_width=None, fig_height=None, columns=1):
    """Set up matplotlib's RC params for LaTeX plotting.
    Call this before plotting a figure.

    Parameters
    ----------
    fig_width : float, optional, inches
    fig_height : float,  optional, inches
    columns : {1, 2}
    """

    # code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples

    # Width and max height in inches for IEEE journals taken from
    # computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf

    assert(columns in [1,2])

    if fig_width is None:
        fig_width = 3.39 if columns==1 else 6.9 # width in inches

    if fig_height is None:
        golden_mean = (sqrt(5)-1.0)/2.0    # Aesthetic ratio
        fig_height = fig_width*golden_mean # height in inches

    MAX_HEIGHT_INCHES = 8.0
    if fig_height > MAX_HEIGHT_INCHES:
        print("WARNING: fig_height too large:" + fig_height + 
              "so will reduce to" + MAX_HEIGHT_INCHES + "inches.")
        fig_height = MAX_HEIGHT_INCHES

    params = {'backend': 'ps',
              'text.latex.preamble': ['\usepackage{gensymb}'],
              'axes.labelsize': 6, # fontsize for x and y labels (was 10)
              'axes.titlesize': 6,
              'text.fontsize': 6, # was 10
              'legend.fontsize': 6, # was 10
              'xtick.labelsize': 6,
              'ytick.labelsize': 6,
              'text.usetex': True,
              'figure.figsize': [fig_width,fig_height],
              'font.family': 'serif'
    }

    matplotlib.rcParams.update(params)


def format_axes(ax):

    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)

    for spine in ['left', 'bottom']:
        ax.spines[spine].set_color(SPINE_COLOR)
        ax.spines[spine].set_linewidth(0.5)

    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')

    for axis in [ax.xaxis, ax.yaxis]:
        axis.set_tick_params(direction='out', color=SPINE_COLOR)

#    matplotlib.pyplot.tight_layout()

    return ax


def get_readings_csv(uuid, start_time, end_time):
        """Get readings from sMAP server for a `uuid` between 
        start_time and end_time

        Parameters
        ---------
        uuid : string
            Unique identifier for the sMAP channel
        start_time : long int
            Timestamp in milliseconds
        end_time : long int
            Timestamp in milliseconds

        Returns
        -------
        smap_data : List of Lists
            Contains data in sMAP format as follows: [[timestamp (ms), power]]
        """
        query = "{}uuid/{}?starttime={}&endtime={}&format=csv".format(data_path,
                                                           uuid, start_time, end_time)
        response = requests.get(query)
        return response


def pd_to_epoch(pd_time):
    temp = pd.DatetimeIndex([pd_time]).astype(np.int64) //10**6
    return temp[0]

def create_df(query, start_time, end_time, uuid=None):
    start_time = pd_to_epoch(start_time)
    end_time = pd_to_epoch(end_time)
    if uuid is None:
        uuid = find_uuid(query)
    temp = get_readings_csv(uuid,start_time,end_time)
    df = pd.read_csv(StringIO(temp.content), names=['timestamp','value'])
    df.index = pd.to_datetime(df.timestamp, unit='ms')
    df = df.drop('timestamp',1)
    df = df.tz_localize("UTC").tz_convert("Asia/Kolkata")
    return df  

def find_uuid(query = None):
    """Returns the UUID corresponding to a home"""
    response = requests.post(uuid_path, query)
    
    uuid = response.json()[0]["uuid"]
    return uuid

def heatmap(df,
            edgecolors='w',
            cmap=mpl.cm.RdYlBu_r,
            log=False):    
    width = len(df.columns)/4
    height = len(df.index)/4
    
    fig, ax = plt.subplots(figsize=(width,height))
      
    heatmap = ax.pcolor(df,
                        edgecolors=edgecolors,  # put white lines between squares in heatmap
                        cmap=cmap,
                        norm=mpl.colors.LogNorm() if log else None)
    
    ax.autoscale(tight=True)  # get rid of whitespace in margins of heatmap
    ax.set_aspect('equal')  # ensure heatmap cells are square
    ax.xaxis.set_ticks_position('top')  # put column labels at the top
    ax.tick_params(bottom='off', top='off', left='off', right='off')  # turn off ticks
    
    plt.yticks(np.arange(len(df.index)) + 0.5, df.index)
    plt.xticks(np.arange(len(df.columns)) + 0.5, df.columns, rotation=90)
    
    # ugliness from http://matplotlib.org/users/tight_layout_guide.html
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", "3%", pad="1%")
    plt.colorbar(heatmap, cax=cax)

In [228]:
latexify(columns=1, fig_height=3)

ax = d.plot(kind='box', rot=90)
plt.ylim((0, 1))
plt.ylabel("Proportion of aggregate")
from common import format_axes
format_axes(ax)
plt.tight_layout()
plt.grid(False)
plt.savefig("/Users/nipunbatra/Dropbox/sharelatex/globalsip_smart_buildings_2015/figs/boxplot_category.pdf")



In [ ]: