In [1]:
import matplotlib.pyplot as plt
#import seaborn as sns
#plt.style.use('ggplot')
%matplotlib inline
from nilmtk import DataSet
import nilmtk
import warnings
import pandas as pd
import numpy as np
import sys
warnings.filterwarnings("ignore")
In [2]:
ds = DataSet("/Users/nipunbatra/Downloads/2months_100homes.hdf5")
In [3]:
num_appliances_per_home = {}
seen = 0
for building in ds.buildings:
seen+=1
sys.stdout.write("\r "+str(seen*100.0/100)+"% done")
sys.stdout.flush()
e = ds.buildings[building].elec
if e is not None:
num_appliances_per_home[building] = len(e.appliances)
else:
print building
In [4]:
df_num_appliances = pd.DataFrame({"num_appliances":num_appliances_per_home})
In [5]:
zero_appliances = df_num_appliances[df_num_appliances["num_appliances"]==0]
zero_appliances.head()
Out[5]:
In [7]:
len(zero_appliances)
Out[7]:
In [8]:
categories = set()
appliance_types = set()
for appliance in nilmtk.global_meter_group.appliances:
categories.update(appliance.categories())
appliance_types.add(appliance.type['type'])
In [9]:
appliance_types
Out[9]:
In [229]:
ALL_APPLIANCES = list(appliance_types)
ALL_APPLIANCES
Out[229]:
In [ ]:
out = {}
for building in ds.buildings:
#for building in [1]:
try:
print building
e = ds.buildings[building].elec
out[building] = {}
out[building]["total"] = e.mains().total_energy()["active"]
out[building]["submetered"] = 100*e.proportion_of_energy_submetered()
for appliance in ALL_APPLIANCES:
print appliance
m = e.select_using_appliances(type = appliance)
if len(m.meters)>0:
out[building][appliance] = m.total_energy()["active"]
else:
out[building][appliance] = np.NAN
except Exception as e:
pass
In [174]:
df_total = pd.DataFrame(out).T
In [176]:
df_total = df_total[df_total>0]
In [177]:
len(df_total)
Out[177]:
In [102]:
df_total = df_total.drop('submetered', 1)
In [103]:
len(df_total)
Out[103]:
In [104]:
#df_total = df_total.dropna()
In [105]:
df_total = df_total[df_total["total"]>0]
df_total = df_total[df_total["Washing"]>0]
In [106]:
df_total = df_total[df_total["HVAC"]>0]
df_total = df_total[df_total["Unmetered"]>0]
In [108]:
df_total_percentage=100*df_total[[col for col in df_total.columns if col not in ['total']]]/df_total[['total']]
In [109]:
df_reduced=df_total[[col for col in df_total.columns if col not in ['total']]]
In [110]:
for col in df_reduced.columns:
df_reduced[col] = 100*df_reduced[col]/df_total['total']
In [111]:
df_reduced.head()
Out[111]:
In [189]:
dft = df_total.copy()
dft = dft/dft.total
dft = dft[[x for x in df_total.columns if x!="total"]]
In [197]:
abc = df_total.copy()
In [198]:
for col in abc.columns:
abc[col] = abc[col]/abc['total']
In [200]:
abc.head()
Out[200]:
In [207]:
d = abc[[x for x in abc.columns if x not in ["total", "submetered"]]]
In [211]:
d = d[d<=1]
In [225]:
# %load common.py
"""Contains helper functions"""
import pandas as pd
import numpy as np
import requests
from StringIO import StringIO
import matplotlib as mpl
import matplotlib.pyplot as plt
uuid_path = "http://energy.iiitd.edu.in:9102/backend/api"
data_path = "http://energy.iiitd.edu.in:9102/backend/api/data/"
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from math import sqrt
SPINE_COLOR = 'gray'
_to_ordinalf_np_vectorized = np.vectorize(mdates._to_ordinalf)
def plot_series(series, **kwargs):
"""Plot function for series which is about 5 times faster than
pd.Series.plot().
Parameters
----------
series : pd.Series
ax : matplotlib Axes, optional
If not provided then will generate our own axes.
fig : matplotlib Figure
date_format : str, optional, default='%d/%m/%y %H:%M:%S'
tz_localize : boolean, optional, default is True
if False then display UTC times.
Can also use all **kwargs expected by `ax.plot`
"""
ax = kwargs.pop('ax', None)
fig = kwargs.pop('fig', None)
date_format = kwargs.pop('date_format', '%d/%m/%y %H:%M:%S')
tz_localize = kwargs.pop('tz_localize', True)
if ax is None:
ax = plt.gca()
if fig is None:
fig = plt.gcf()
x = _to_ordinalf_np_vectorized(series.index.to_pydatetime())
ax.plot(x, series, **kwargs)
tz = series.index.tzinfo if tz_localize else None
ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format,
tz=tz))
ax.set_ylabel('watts')
fig.autofmt_xdate()
return ax
def latexify(fig_width=None, fig_height=None, columns=1):
"""Set up matplotlib's RC params for LaTeX plotting.
Call this before plotting a figure.
Parameters
----------
fig_width : float, optional, inches
fig_height : float, optional, inches
columns : {1, 2}
"""
# code adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples
# Width and max height in inches for IEEE journals taken from
# computer.org/cms/Computer.org/Journal%20templates/transactions_art_guide.pdf
assert(columns in [1,2])
if fig_width is None:
fig_width = 3.39 if columns==1 else 6.9 # width in inches
if fig_height is None:
golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
fig_height = fig_width*golden_mean # height in inches
MAX_HEIGHT_INCHES = 8.0
if fig_height > MAX_HEIGHT_INCHES:
print("WARNING: fig_height too large:" + fig_height +
"so will reduce to" + MAX_HEIGHT_INCHES + "inches.")
fig_height = MAX_HEIGHT_INCHES
params = {'backend': 'ps',
'text.latex.preamble': ['\usepackage{gensymb}'],
'axes.labelsize': 6, # fontsize for x and y labels (was 10)
'axes.titlesize': 6,
'text.fontsize': 6, # was 10
'legend.fontsize': 6, # was 10
'xtick.labelsize': 6,
'ytick.labelsize': 6,
'text.usetex': True,
'figure.figsize': [fig_width,fig_height],
'font.family': 'serif'
}
matplotlib.rcParams.update(params)
def format_axes(ax):
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
for spine in ['left', 'bottom']:
ax.spines[spine].set_color(SPINE_COLOR)
ax.spines[spine].set_linewidth(0.5)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
for axis in [ax.xaxis, ax.yaxis]:
axis.set_tick_params(direction='out', color=SPINE_COLOR)
# matplotlib.pyplot.tight_layout()
return ax
def get_readings_csv(uuid, start_time, end_time):
"""Get readings from sMAP server for a `uuid` between
start_time and end_time
Parameters
---------
uuid : string
Unique identifier for the sMAP channel
start_time : long int
Timestamp in milliseconds
end_time : long int
Timestamp in milliseconds
Returns
-------
smap_data : List of Lists
Contains data in sMAP format as follows: [[timestamp (ms), power]]
"""
query = "{}uuid/{}?starttime={}&endtime={}&format=csv".format(data_path,
uuid, start_time, end_time)
response = requests.get(query)
return response
def pd_to_epoch(pd_time):
temp = pd.DatetimeIndex([pd_time]).astype(np.int64) //10**6
return temp[0]
def create_df(query, start_time, end_time, uuid=None):
start_time = pd_to_epoch(start_time)
end_time = pd_to_epoch(end_time)
if uuid is None:
uuid = find_uuid(query)
temp = get_readings_csv(uuid,start_time,end_time)
df = pd.read_csv(StringIO(temp.content), names=['timestamp','value'])
df.index = pd.to_datetime(df.timestamp, unit='ms')
df = df.drop('timestamp',1)
df = df.tz_localize("UTC").tz_convert("Asia/Kolkata")
return df
def find_uuid(query = None):
"""Returns the UUID corresponding to a home"""
response = requests.post(uuid_path, query)
uuid = response.json()[0]["uuid"]
return uuid
def heatmap(df,
edgecolors='w',
cmap=mpl.cm.RdYlBu_r,
log=False):
width = len(df.columns)/4
height = len(df.index)/4
fig, ax = plt.subplots(figsize=(width,height))
heatmap = ax.pcolor(df,
edgecolors=edgecolors, # put white lines between squares in heatmap
cmap=cmap,
norm=mpl.colors.LogNorm() if log else None)
ax.autoscale(tight=True) # get rid of whitespace in margins of heatmap
ax.set_aspect('equal') # ensure heatmap cells are square
ax.xaxis.set_ticks_position('top') # put column labels at the top
ax.tick_params(bottom='off', top='off', left='off', right='off') # turn off ticks
plt.yticks(np.arange(len(df.index)) + 0.5, df.index)
plt.xticks(np.arange(len(df.columns)) + 0.5, df.columns, rotation=90)
# ugliness from http://matplotlib.org/users/tight_layout_guide.html
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", "3%", pad="1%")
plt.colorbar(heatmap, cax=cax)
In [228]:
latexify(columns=1, fig_height=3)
ax = d.plot(kind='box', rot=90)
plt.ylim((0, 1))
plt.ylabel("Proportion of aggregate")
from common import format_axes
format_axes(ax)
plt.tight_layout()
plt.grid(False)
plt.savefig("/Users/nipunbatra/Dropbox/sharelatex/globalsip_smart_buildings_2015/figs/boxplot_category.pdf")
In [ ]: