This script shows how to use the existing code in opengrid to create a baseload electricity consumption benchmark.


In [ ]:
import os, sys
import inspect
import numpy as np

import datetime as dt
import time
import pytz
import pandas as pd
import pdb

script_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
# add the path to opengrid to sys.path
sys.path.append(os.path.join(script_dir, os.pardir, os.pardir))

from opengrid.library import config
c=config.Config()
DEV = c.get('env', 'type') == 'dev' # DEV is True if we are in development environment, False if on the droplet

if not DEV:
    # production environment: don't try to display plots
    import matplotlib
    matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter, AutoDateLocator

# find tmpo
sys.path.append(c.get('tmpo', 'folder'))

from opengrid.library.houseprint import houseprint

if DEV:
    if c.get('env', 'plots') == 'inline':
        %matplotlib inline
    else:
        %matplotlib qt
else:
    pass # don't try to render plots
plt.rcParams['figure.figsize'] = 12,8

Script settings


In [ ]:
BXL = pytz.timezone('Europe/Brussels')
number_of_days = 7

We create one big dataframe, the columns are the sensors of type electricity


In [ ]:
hp = houseprint.load_houseprint_from_file('new_houseprint.pkl')
hp.init_tmpo()

In [ ]:
start = pd.Timestamp(time.time() - number_of_days*86400, unit='s')

df = hp.get_data(sensortype='electricity', head=start, resample='s')

In [ ]:
df = df.resample(rule='60s', how='max')
df = df.diff()*3600/60

Convert Datetimeindex to local time


In [ ]:
df.index = df.index.tz_convert(BXL)

In [ ]:
# plot a few dataframes to inspect them
if DEV:
    for sensor in df.columns:
        plt.figure()
        df[sensor].plot()

We define two low-level functions


In [ ]:
def testvalid(row):
    return row['maxima'] > 0 and row['maxima'] <> row['minima']

In [ ]:
def get_minima(sensor):
    """
    Return the standby consumption for the covered days for a given sensor as an array.  
    Take care of days where this sensor has NO VALID standby consumption
    """
    
    global minima
    
    res = np.ndarray(len(minima))
    for i,df in enumerate(minima):
        try: 
            res[i] = df[sensor]
        except:
            res[i] = np.nan
            
    return res

Data handling

We have to filter out the data, we do three things:

  1. split the data in dataframes per day
  2. filter out the night-time hours (between 00h00 and 05h00)
  3. we check if the resulting time series contain enough variation (negatives and constant signals are filtered out)

In [ ]:
index_slices = [] # will contain the correct index slices for each of the analysed nights
minima = [] # each element in minima is a dataframe with standby consumption per valid sensor
valid_sensors = set() # we keep track of all sensors that yield a valid standby consumption for at least one day.


# find the date for which we still have the full night (between 01:00 and 05:00).  We will store it as datetime at 00:00 (local time)
hour = df.index[-1].hour # the hour of the last index.  
if hour >= 5:
    last_day = df.index[-1].date()
else:
    last_day = (df.index[-1] - dt.timedelta(days=1)).date()

for day in range(number_of_days)[::-1]:
    #pdb.set_trace()
    dt_start = dt.datetime.combine(last_day - dt.timedelta(days=day), dt.time(0,0)) # start slicing at 01:00 local time
    dt_stop = dt.datetime.combine(last_day - dt.timedelta(days=day), dt.time(5,0)) # stop slicing at 05:00 local time
       
    df_night = df.ix[dt_start:dt_stop] # contains only data for a single night
    index_slices.append(df_night.index.copy())
        
    df_results = pd.DataFrame(index=df.columns)  #df_results contains the results of the analysis for a single night.  Index = sensorid
    df_results['minima'] = df_night.min(axis=0)
    df_results['maxima'] = df_night.max(axis=0)
    df_results['valid'] = df_results.apply(testvalid, axis=1)
    
    minima.append(df_results['minima'].ix[df_results.valid])
    valid_sensors.update(set(minima[-1].index.tolist()))

Plots

The next plots are the current benchmarks, anonymous. The left figure shows where the given sensor (or family) is situated compared to all other families. The right plot shows the night-time consumption for this night.

In a next step, it would be nice to create an interactive plot (D3.js?) for the right side: it should show the night-time consumption for the day over which the mouse hovers in the left graph.


In [ ]:
index_slices_days = [x[0] for x in index_slices[1:]]
index = pd.DatetimeIndex(freq='D', start=index_slices_days[0], periods=number_of_days)

In [ ]:
df_=pd.concat(minima, axis=1)
df_.columns = index

In [ ]:
df_statistics = df_.describe().T

In [ ]:
df_statistics

In [ ]:
for sensor in list(valid_sensors)[:]:
    plt.figure(figsize=(10,8))
    ax1=plt.subplot(211)
    ax1.plot_date(df_statistics.index, df_statistics[u'25%'], '-', lw=2, color='g', label=u'25%')
    ax1.plot_date(df_statistics.index, df_statistics[u'50%'], '-', lw=2, color='orange', label=u'50%')
    ax1.plot_date(df_statistics.index, df_statistics[u'75%'], '-', lw=2, color='r', label=u'75%')
    
    ax1.plot_date(df_.T.index, df_.T[sensor], 'rD', ms=7) 
    
    xticks = [x.strftime(format='%d/%m') for x in df_statistics.index]
    locs, lables=plt.xticks()
    plt.xticks(locs, xticks, rotation='vertical')
    plt.title(hp.find_sensor(sensor).device.key + ' - ' + sensor)
    ax1.grid()
    ax1.set_ylabel('Watt')
    
    ax2=plt.subplot(212)
    try:
        ax2.plot(index_slices[-1], df.ix[index_slices[-1]][sensor], 'b-', label='Afgelopen nacht')
        ax2.xaxis_date(BXL) #Put timeseries plot in local time
        # rotate the labels
        plt.xticks(rotation='vertical')
        plt.legend()
        ax2.set_ylabel('Watt')
    except:
        print "Could not create graph for {}".format(hp.find_sensor(sensor).device.key)
    else:
        plt.savefig(os.path.join(c.get('data', 'folder'), 'figures', 'standby_vertical_'+sensor+'.png'), dpi=100)
    if not DEV:
        plt.close()

In [ ]:
try:
    valid_sensors.remove('565de0a7dc64d8370aa321491217b85f') # the FLM of 3E does not fit in household standby benchmark
except:
    pass

for sensor in valid_sensors:
    plt.figure(figsize=(10,5))
    ax1=plt.subplot(121)
    box = [x.values for x in minima]
    ax1.boxplot(box, positions=range(len(df_statistics)), notch=False)
    ax1.plot(range(len(df_statistics)), get_minima(sensor), 'rD', ms=10, label='Sluipverbruik')
    xticks = [x[0].strftime(format='%d/%m') for x in index_slices]
    plt.xticks(range(len(df_statistics)), xticks, rotation='vertical')
    #plt.title(hp.get_flukso_from_sensor(sensor) + ' - ' + sensor)
    ax1.grid()
    ax1.set_ylabel('Watt')
    plt.legend(numpoints=1, frameon=False)
    #ax1.set_xticklabels([t.strftime(format='%d/%m') for t in df_all_perday.index.tolist()])

    ax2=plt.subplot(122)
    try:
        ax2.plot(index_slices[-1], df.ix[index_slices[-1]][sensor], 'b-', label='Afgelopen nacht')
        ax2.xaxis_date(BXL) #Put timeseries plot in local time
        # rotate the labels
        plt.xticks(rotation='vertical')
        ax2.set_ylabel('Watt')
        ax2.grid()
        plt.legend(loc='upper right', frameon=False)
        plt.tight_layout()
    except Exception as e:
        print(e)
    else:
        plt.savefig(os.path.join(c.get('data', 'folder'), 'figures', 'standby_horizontal_'+sensor+'.png'), dpi=100)
    
    if not DEV:
        plt.close()

In [ ]: