In [ ]:
import os
import pandas as pd
from opengrid.library import houseprint, caching, regression
from opengrid import config
c = config.Config()
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,8
In [ ]:
# Create houseprint from saved file, if not available, parse the google spreadsheet
try:
hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
hp = houseprint.load_houseprint_from_file(hp_filename)
print("Houseprint loaded from {}".format(hp_filename))
except Exception as e:
print(e)
print("Because of this error we try to build the houseprint from source")
hp = houseprint.Houseprint()
hp.init_tmpo()
We are going to use daily gas, electricity and water consumption data and weather data. Because we don't want to overload the weather API, we will only use 1 location (Ukkel).
First, let's define the start and end date of the identification data. That is the data to be used to find the model. Later, we will use the model to predict.
In [ ]:
start = pd.Timestamp('2015-01-01', tz='Europe/Brussels')
end = pd.Timestamp('now', tz='Europe/Brussels')
end_model = pd.Timestamp('2016-12-31', tz='Europe/Brussels') #last day of the data period for the model
In [ ]:
caches = {}
dfs = {}
for cons in ['electricity', 'gas', 'water']:
caches[cons] = caching.Cache(variable='{}_daily_total'.format(cons))
dfs[cons] = caches[cons].get(sensors = hp.get_sensors(sensortype=cons))
Run this block to download the weather data and save it to a pickle. This is a large request, and you can only do 2 or 3 of these per day before your credit with Forecast.io runs out!
We also add a column for each day-of-week which may be used by the regression algorithm on a daily basis.
In [ ]:
from opengrid.library import forecastwrapper
weather = forecastwrapper.Weather(location=(50.8024, 4.3407), start=start, end=end - pd.Timedelta(days=1))
irradiances=[
(0, 90), # north vertical
(90, 90), # east vertical
(180, 90), # south vertical
(270, 90), # west vertical
]
orientations = [0, 90, 180, 270]
weather_data = weather.days(irradiances=irradiances,
wind_orients=orientations,
heating_base_temperatures=[0, 6, 8 ,10, 12, 14, 16, 18]).dropna(axis=1)
weather_data.drop(['icon', 'summary', 'moonPhase', 'windBearing', 'temperatureMaxTime', 'temperatureMinTime',
'apparentTemperatureMaxTime', 'apparentTemperatureMinTime', 'uvIndexTime',
'sunsetTime', 'sunriseTime'],
axis=1, inplace=True)
# Add columns for the day-of-week
for i, d in zip(range(7), ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']):
weather_data[d] = 0
weather_data.loc[weather_data.index.weekday == i, d] = 1
weather_data = weather_data.applymap(float)
In [ ]:
weather_data.head()
In [ ]:
weather_data.columns
In [ ]:
def data_generator(consumable):
dfcons = dfs[consumable]
for sensorid in dfcons.columns:
df = pd.concat([dfcons[sensorid], weather_data], axis=1).dropna()
df = df.tz_convert('Europe/Brussels')
yield sensorid, df
Let's have a peek
In [ ]:
cons = 'gas'
analysis_data = data_generator(cons)
sensorid, peek = next(analysis_data)
peek = peek.resample(rule='MS').sum()
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot_date(peek.index, peek[sensorid], '-', color='grey', lw=8, label=cons)
for column in peek.columns[1:]:
if 'heatingDegreeDays' in column:
ax2.plot_date(peek.index, peek[column], '-', label=column)
plt.legend()
In [ ]:
cons = 'water'
save_figures = True
In [ ]:
analysis_data = data_generator(cons)
mrs_month = []
mrs_month_cv = []
mrs_week = []
for sensorid, data in analysis_data:
data.rename(columns={sensorid:cons}, inplace=True)
df = data.resample(rule='MS').sum()
if len(df) < 2:
continue
# monthly model, statistical validation
mrs_month.append(regression.MVLinReg(df.ix[:end_model], cons, p_max=0.03))
figures = mrs_month[-1].plot(df=df)
if save_figures:
figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_model_'+sensorid+'.png'), dpi=100)
figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_results_'+sensorid+'.png'), dpi=100)
# weekly model, statistical validation
df = data.resample(rule='W').sum()
if len(df.ix[:end_model]) < 4:
continue
mrs_week.append(regression.MVLinReg(df.ix[:end_model], cons, p_max=0.02))
if len(df.ix[end_model:]) > 0:
figures = mrs_week[-1].plot(model=False, bar_chart=True, df=df.ix[end_model:])
if save_figures:
figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_prediction_weekly_'+sensorid+'.png'), dpi=100)
In [ ]: