In [ ]:
import os
import pandas as pd
from opengrid.library import houseprint, regression
from opengrid import config
c = config.Config()
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,8
We are going to use gas consumption data and weather data. Because we don't want to overload the weather API, we will only use 1 location (Ukkel).
First, let's define the start and end date of the identification data. That is the data to be used to find the model. Later, we will use the model to predict.
In [ ]:
start = pd.Timestamp('2015-01-01', tz='Europe/Brussels')
end = pd.Timestamp('now', tz='Europe/Brussels')
end_model = pd.Timestamp('2016-12-31', tz='Europe/Brussels') #last day of the data period for the model
In [ ]:
try:
hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
hp = houseprint.load_houseprint_from_file(hp_filename)
print("Houseprint loaded from {}".format(hp_filename))
except Exception as e:
print(e)
print("Because of this error we try to build the houseprint from source")
hp = houseprint.Houseprint()
hp.init_tmpo()
In [ ]:
hp.sync_tmpos()
In [ ]:
def gas_data_generator():
# Daily approach, obtain fully correct daily data.
# To be aggregated to monthly or weekly or ...
for gas_sensor in hp.get_sensors(sensortype='gas'):
df = gas_sensor.get_data(head=start-pd.Timedelta(days=1),
tail=end+pd.Timedelta(days=1),
resample='day',
unit='kWh',
diff=False,
tz='Europe/Brussels')
df = df.diff().shift(-1).dropna()
if df.empty:
continue
yield df
def elec_data_generator():
# Preferred method: as accurate as 3, and faster
# Daily approach, obtain fully correct daily data.
# To be aggregated to monthly or weekly or ...
for sensor in hp.get_sensors(sensortype='electricity'):
df = sensor.get_data(head=start-pd.Timedelta(days=1),
tail=end+pd.Timedelta(days=1),
resample='day',
unit='kWh',
diff=False,
tz='Europe/Brussels')
df = df.diff().shift(-1).dropna()
if df.empty:
continue
yield df
Let's have a peek
In [ ]:
gas_data = gas_data_generator()
elec_data = elec_data_generator()
In [ ]:
peek = next(gas_data)
peek.plot()
Run this block to download the weather data and save it to a pickle. This is a large request, and you can only do 2 or 3 of these per day before your credit with Forecast.io runs out!
To get the data run the cell below
In [ ]:
from opengrid.library import forecastwrapper
weather = forecastwrapper.Weather(location=(50.8024, 4.3407), start=start, end=end - pd.Timedelta(days=1))
irradiances=[
(0, 90), # north vertical
(90, 90), # east vertical
(180, 90), # south vertical
(270, 90), # west vertical
]
orientations = [0, 90, 180, 270]
weather_data = weather.days(irradiances=irradiances,
wind_orients=orientations,
heating_base_temperatures=[0, 6, 8 ,10, 12, 14, 16, 18]).dropna(axis=1)
weather_data.drop(['icon', 'summary', 'moonPhase', 'windBearing', 'temperatureMaxTime', 'temperatureMinTime',
'apparentTemperatureMaxTime', 'apparentTemperatureMinTime', 'uvIndexTime',
'sunsetTime', 'sunriseTime'],
axis=1, inplace=True)
weather_data = weather_data.applymap(float)
In [ ]:
weather_data.head()
In [ ]:
weather_data.columns
I wrote a generator that uses our previously defined generator so you can generate while you generate.
In [ ]:
def analysis_data_generator():
gas_data = gas_data_generator()
for gas_df in gas_data:
sensorid = gas_df.name
gas_df.name='gas'
df = pd.concat([gas_df, weather_data], axis=1).dropna()
df = df.tz_convert('Europe/Brussels')
yield sensorid, df
def analysis_data_generator_elec():
elec_data = elec_data_generator()
for elec_df in elec_data:
sensorid = elec_df.name
elec_df.name='elec'
df = pd.concat([elec_df, weather_data], axis=1).dropna()
df = df.tz_convert('Europe/Brussels')
yield sensorid, df
Let's have another peek
In [ ]:
analysis_data = analysis_data_generator()
In [ ]:
analysis_data = analysis_data_generator()
mrs_month = []
mrs_month_cv = []
mrs_week = []
for sensorid, data in analysis_data:
df = data.resample(rule='MS').sum()
if len(df) < 2:
continue
# monthly model, statistical validation
mrs_month.append(regression.MVLinReg(df.ix[:end_model], 'gas', p_max=0.03))
figures = mrs_month[-1].plot(df=df)
figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_model_'+sensorid+'.png'), dpi=100)
figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_results_'+sensorid+'.png'), dpi=100)
# weekly model, statistical validation
df = data.resample(rule='W').sum()
mrs_week.append(regression.MVLinReg(df.ix[:end_model], 'gas', p_max=0.02))
if len(df.ix[end_model:]) > 0:
figures = mrs_week[-1].plot(model=False, bar_chart=True, df=df.ix[end_model:])
figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_prediction_weekly_'+sensorid+'.png'), dpi=100)
In [ ]:
data
In [ ]:
analysis_data = analysis_data_generator_elec()
mrs_month_elec = []
mrs_month_elec_cv = []
mrs_week_elec = []
In [ ]:
for sensorid, data in analysis_data:
df = data.resample(rule='MS').sum()
if len(df) < 2:
continue
# monthly model, statistical validation
mrs_month_elec.append(regression.MVLinReg(df.ix[:end_model], 'elec', p_max=0.03))
figures = mrs_month_elec[-1].plot(df=df)
figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_results_'+sensorid+'.png'), dpi=100)
In [ ]:
break
# weekly model, statistical validation
df = data.resample(rule='W').sum()
mrs_week.append(regression.MVLinReg(df.ix[:end_model], 'elec', p_max=0.02))
if len(df.ix[end_model:]) > 0:
mrs_week[-1].plot(model=False, bar_chart=True, df=df.ix[end_model:])
In [ ]:
data.info()
In [ ]:
pd.Timestamp(pd.Timestamp('now', tz='Europe/Brussels').date(), tz='Europe/Brussels')
In [ ]: