notebook.community

Edit and run



In [1]:

    
import matplotlib.pyplot as plt
#import seaborn as sns
#plt.style.use('ggplot')
%matplotlib inline
from nilmtk import DataSet
import nilmtk
import warnings
import pandas as pd
import numpy as np
import sys
warnings.filterwarnings("ignore")



In [2]:

    
ds = DataSet("/Users/nipunbatra/Downloads/2months_100homes.hdf5")



In [3]:

    
num_appliances_per_home = {}
seen = 0
for building in ds.buildings:
    seen+=1
    sys.stdout.write("\r "+str(seen*100.0/100)+"% done")
    sys.stdout.flush()
    e = ds.buildings[building].elec
    if e is not None:
        num_appliances_per_home[building] = len(e.appliances)
    else:
        print building









    



 100.0% done



In [4]:

    
df_num_appliances = pd.DataFrame({"num_appliances":num_appliances_per_home})



In [5]:

    
zero_appliances = df_num_appliances[df_num_appliances["num_appliances"]==0]
zero_appliances.head()









    Out[5]:






  
    
      
      num_appliances
    
  
  
    
      4
      0
    
    
      24
      0
    
    
      33
      0
    
    
      37
      0
    
    
      38
      0



In [7]:

    
len(zero_appliances)









    Out[7]:





15



In [8]:

    
categories = set()
appliance_types = set()
for appliance in nilmtk.global_meter_group.appliances:
    categories.update(appliance.categories())
    appliance_types.add(appliance.type['type'])



In [9]:

    
appliance_types









    Out[9]:





{'air conditioner',
 'appliance',
 'dish washer',
 'electric furnace',
 'electric hot tub heater',
 'electric space heater',
 'electric swimming pool heater',
 'electric vehicle',
 'electric water heating appliance',
 'freezer',
 'fridge',
 'light',
 'microwave',
 'oven',
 'security alarm',
 'sockets',
 'spin dryer',
 'stove',
 'unknown',
 'washer dryer',
 'washing machine',
 'waste disposal unit'}



In [229]:

    
ALL_APPLIANCES = list(appliance_types)
ALL_APPLIANCES









    Out[229]:





['stove',
 'electric vehicle',
 'oven',
 'sockets',
 'microwave',
 'electric space heater',
 'unknown',
 'security alarm',
 'freezer',
 'electric furnace',
 'dish washer',
 'electric hot tub heater',
 'electric water heating appliance',
 'fridge',
 'waste disposal unit',
 'washer dryer',
 'electric swimming pool heater',
 'light',
 'washing machine',
 'appliance',
 'spin dryer',
 'air conditioner']



In [ ]:

    
out = {}
for building in ds.buildings:
#for building in [1]:


    try:
        print building
        e = ds.buildings[building].elec
        out[building] = {}
        out[building]["total"] = e.mains().total_energy()["active"]
        out[building]["submetered"] = 100*e.proportion_of_energy_submetered()
        for appliance in ALL_APPLIANCES:
            print appliance
            m = e.select_using_appliances(type = appliance)
            if len(m.meters)>0:
                out[building][appliance] = m.total_energy()["active"]
            else:
                out[building][appliance] = np.NAN
    except Exception as e:
        pass



In [174]:

    
df_total = pd.DataFrame(out).T



In [176]:

    
df_total = df_total[df_total>0]



In [177]:

    
len(df_total)









    Out[177]:





100



In [102]:

    
df_total = df_total.drop('submetered', 1)



In [103]:

    
len(df_total)









    Out[103]:





100



In [104]:

    
#df_total = df_total.dropna()



In [105]:

    
df_total = df_total[df_total["total"]>0]
df_total = df_total[df_total["Washing"]>0]



In [106]:

    
df_total = df_total[df_total["HVAC"]>0]
df_total = df_total[df_total["Unmetered"]>0]



In [108]:

    
df_total_percentage=100*df_total[[col for col in df_total.columns if col not in ['total']]]/df_total[['total']]



In [109]:

    
df_reduced=df_total[[col for col in df_total.columns if col not in ['total']]]



In [110]:

    
for col in df_reduced.columns:
    df_reduced[col] = 100*df_reduced[col]/df_total['total']



In [111]:

    
df_reduced.head()









    Out[111]:






  
    
      
      Electric vehicle
      Fridge
      HVAC
      Kitchen
      Lighting
      Others
      Sockets
      Washing
      Water heating
      Unmetered
    
  
  
    
      1
      NaN
      NaN
      0.026188
      1.775346
      NaN
      NaN
      NaN
      2.445574
      NaN
      99.929510
    
    
      3
      NaN
      5.016922
      33.767455
      3.259490
      30.538475
      0.023428
      11.719942
      4.116258
      NaN
      98.525967
    
    
      7
      NaN
      2.629206
      1.572423
      0.556408
      NaN
      NaN
      NaN
      1.196294
      NaN
      99.900815
    
    
      8
      NaN
      NaN
      0.400722
      NaN
      NaN
      NaN
      NaN
      18.211417
      NaN
      99.689798
    
    
      9
      NaN
      NaN
      71.444723
      NaN
      NaN
      NaN
      22.094554
      4.014734
      NaN
      98.374100



In [189]:

    
dft = df_total.copy()
dft = dft/dft.total
dft = dft[[x for x in df_total.columns if x!="total"]]



In [197]:

    
abc = df_total.copy()



In [198]:

    
for col in abc.columns:
    abc[col] = abc[col]/abc['total']



In [200]:

    
abc.head()









    Out[200]:






  
    
      
      air conditioner
      appliance
      dish washer
      electric furnace
      electric hot tub heater
      electric space heater
      electric swimming pool heater
      electric vehicle
      electric water heating appliance
      freezer
      ...
      security alarm
      sockets
      spin dryer
      stove
      submetered
      total
      unknown
      washer dryer
      washing machine
      waste disposal unit
    
  
  
    
      1
      0.000262
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      0.024456
      NaN
      0.004806
      1
      NaN
      NaN
      NaN
      NaN
    
    
      2
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      0.142502
      NaN
      0.028147
      0.195173
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      0.117199
      0.039529
      NaN
      0.060138
      1
      NaN
      NaN
      2.402583
      0.34455
    
    
      4
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      NaN
      NaN
      NaN
      NaN
    
    
      5
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 24 columns



In [207]:

    
d = abc[[x for x in abc.columns if x not in ["total", "submetered"]]]



In [211]:

    
d = d[d<=1]



In [225]:

    
# %load common.py
"""Contains helper functions"""
import pandas as pd
import numpy as np
import requests
from StringIO import StringIO
import matplotlib as mpl
import matplotlib.pyplot as plt


uuid_path = "http://energy.iiitd.edu.in:9102/backend/api"
data_path = "http://energy.iiitd.edu.in:9102/backend/api/data/"


import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from math import sqrt

SPINE_COLOR = 'gray'

_to_ordinalf_np_vectorized = np.vectorize(mdates._to_ordinalf)

def plot_series(series, **kwargs):
    """Plot function for series which is about 5 times faster than
    pd.Series.plot().

    Parameters
    ----------
    series : pd.Series
    ax : matplotlib Axes, optional
        If not provided then will generate our own axes.
    fig : matplotlib Figure
    date_format : str, optional, default='%d/%m/%y %H:%M:%S'
    tz_localize : boolean, optional, default is True
        if False then display UTC times.

    Can also use all **kwargs expected by `ax.plot`
    """
    ax = kwargs.pop('ax', None)
    fig = kwargs.pop('fig', None)
    date_format = kwargs.pop('date_format', '%d/%m/%y %H:%M:%S')
    tz_localize = kwargs.pop('tz_localize', True)

    if ax is None:
        ax = plt.gca()

    if fig is None:
        fig = plt.gcf()

    x = _to_ordinalf_np_vectorized(series.index.to_pydatetime())
    ax.plot(x, series, **kwargs)
    tz = series.index.tzinfo if tz_localize else None
    ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format, 
                                                      tz=tz))
    ax.set_ylabel('watts')
    fig.autofmt_xdate()
    return ax


def latexify(fig_width=None, fig_height=None, columns=1):
    """Set up matplotlib's RC params for LaTeX plotting.
    Call this before plotting a figure.

    Parameters
    ----------
    fig_width : float, optional, inches
    fig_height : float,  optional, inches
    columns : {1, 2}
    """

    # code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples

    # Width and max height in inches for IEEE journals taken from
    # computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf

    assert(columns in [1,2])

    if fig_width is None:
        fig_width = 3.39 if columns==1 else 6.9 # width in inches

    if fig_height is None:
        golden_mean = (sqrt(5)-1.0)/2.0    # Aesthetic ratio
        fig_height = fig_width*golden_mean # height in inches

    MAX_HEIGHT_INCHES = 8.0
    if fig_height > MAX_HEIGHT_INCHES:
        print("WARNING: fig_height too large:" + fig_height + 
              "so will reduce to" + MAX_HEIGHT_INCHES + "inches.")
        fig_height = MAX_HEIGHT_INCHES

    params = {'backend': 'ps',
              'text.latex.preamble': ['\usepackage{gensymb}'],
              'axes.labelsize': 6, # fontsize for x and y labels (was 10)
              'axes.titlesize': 6,
              'text.fontsize': 6, # was 10
              'legend.fontsize': 6, # was 10
              'xtick.labelsize': 6,
              'ytick.labelsize': 6,
              'text.usetex': True,
              'figure.figsize': [fig_width,fig_height],
              'font.family': 'serif'
    }

    matplotlib.rcParams.update(params)


def format_axes(ax):

    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)

    for spine in ['left', 'bottom']:
        ax.spines[spine].set_color(SPINE_COLOR)
        ax.spines[spine].set_linewidth(0.5)

    ax.xaxis.set_ticks_position('bottom')
    ax.yaxis.set_ticks_position('left')

    for axis in [ax.xaxis, ax.yaxis]:
        axis.set_tick_params(direction='out', color=SPINE_COLOR)

#    matplotlib.pyplot.tight_layout()

    return ax


def get_readings_csv(uuid, start_time, end_time):
        """Get readings from sMAP server for a `uuid` between 
        start_time and end_time

        Parameters
        ---------
        uuid : string
            Unique identifier for the sMAP channel
        start_time : long int
            Timestamp in milliseconds
        end_time : long int
            Timestamp in milliseconds

        Returns
        -------
        smap_data : List of Lists
            Contains data in sMAP format as follows: [[timestamp (ms), power]]
        """
        query = "{}uuid/{}?starttime={}&endtime={}&format=csv".format(data_path,
                                                           uuid, start_time, end_time)
        response = requests.get(query)
        return response


def pd_to_epoch(pd_time):
    temp = pd.DatetimeIndex([pd_time]).astype(np.int64) //10**6
    return temp[0]

def create_df(query, start_time, end_time, uuid=None):
    start_time = pd_to_epoch(start_time)
    end_time = pd_to_epoch(end_time)
    if uuid is None:
        uuid = find_uuid(query)
    temp = get_readings_csv(uuid,start_time,end_time)
    df = pd.read_csv(StringIO(temp.content), names=['timestamp','value'])
    df.index = pd.to_datetime(df.timestamp, unit='ms')
    df = df.drop('timestamp',1)
    df = df.tz_localize("UTC").tz_convert("Asia/Kolkata")
    return df  

def find_uuid(query = None):
    """Returns the UUID corresponding to a home"""
    response = requests.post(uuid_path, query)
    
    uuid = response.json()[0]["uuid"]
    return uuid

def heatmap(df,
            edgecolors='w',
            cmap=mpl.cm.RdYlBu_r,
            log=False):    
    width = len(df.columns)/4
    height = len(df.index)/4
    
    fig, ax = plt.subplots(figsize=(width,height))
      
    heatmap = ax.pcolor(df,
                        edgecolors=edgecolors,  # put white lines between squares in heatmap
                        cmap=cmap,
                        norm=mpl.colors.LogNorm() if log else None)
    
    ax.autoscale(tight=True)  # get rid of whitespace in margins of heatmap
    ax.set_aspect('equal')  # ensure heatmap cells are square
    ax.xaxis.set_ticks_position('top')  # put column labels at the top
    ax.tick_params(bottom='off', top='off', left='off', right='off')  # turn off ticks
    
    plt.yticks(np.arange(len(df.index)) + 0.5, df.index)
    plt.xticks(np.arange(len(df.columns)) + 0.5, df.columns, rotation=90)
    
    # ugliness from http://matplotlib.org/users/tight_layout_guide.html
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", "3%", pad="1%")
    plt.colorbar(heatmap, cax=cax)



In [228]:

    
latexify(columns=1, fig_height=3)

ax = d.plot(kind='box', rot=90)
plt.ylim((0, 1))
plt.ylabel("Proportion of aggregate")
from common import format_axes
format_axes(ax)
plt.tight_layout()
plt.grid(False)
plt.savefig("/Users/nipunbatra/Dropbox/sharelatex/globalsip_smart_buildings_2015/figs/boxplot_category.pdf")



In [ ]:

	Electric vehicle	Fridge	HVAC	Kitchen	Lighting	Others	Sockets	Washing	Water heating	Unmetered
1	NaN	NaN	0.026188	1.775346	NaN	NaN	NaN	2.445574	NaN	99.929510
3	NaN	5.016922	33.767455	3.259490	30.538475	0.023428	11.719942	4.116258	NaN	98.525967
7	NaN	2.629206	1.572423	0.556408	NaN	NaN	NaN	1.196294	NaN	99.900815
8	NaN	NaN	0.400722	NaN	NaN	NaN	NaN	18.211417	NaN	99.689798
9	NaN	NaN	71.444723	NaN	NaN	NaN	22.094554	4.014734	NaN	98.374100

	air conditioner	appliance	dish washer	electric furnace	electric hot tub heater	electric space heater	electric swimming pool heater	electric vehicle	electric water heating appliance	freezer	...	security alarm	sockets	spin dryer	stove	submetered	total	unknown	washer dryer	washing machine	waste disposal unit
1	0.000262	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	0.024456	NaN	0.004806	1	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	0.142502	NaN	0.028147	0.195173	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	0.117199	0.039529	NaN	0.060138	1	NaN	NaN	2.402583	0.34455
4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	1	NaN	NaN	NaN	NaN
5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN