General imports
In [ ]:
import pandas as pd
OpenGrid-specific imports
In [ ]:
from opengrid.library import houseprint
from opengrid import config
from opengrid.library import linearregression
c = config.Config()
Plotting settings
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,8
We are going to use gas consumption data and weather data. Because we don't want to overload the weather API, we will only use 1 location (Ukkel).
First, let's define the start and end date of our experiment. Let's take 1 year worth of data, starting with last month.
In [ ]:
# If we want to get consumption for 12 months, we will need 13 months of data
end = pd.Timestamp.today().replace(day=2).normalize()
start = (end.replace(year=end.year-1) - pd.Timedelta(days=2))
start = start.tz_localize('Europe/Brussels')
end = end.tz_localize('Europe/Brussels')
print(start, end)
In [ ]:
# Load the Houseprint, and sync all data
hp = houseprint.Houseprint()
#hp = houseprint.load_houseprint_from_file('cache_hp.hp')
hp.init_tmpo()
#hp.sync_tmpos()
In [ ]:
#hp.save('cache_hp.hp')
In [ ]:
def gas_data_generator1():
# original monthly data generator, returns wrong data
for gas_sensor in hp.get_sensors(sensortype='gas'):
df = gas_sensor.get_data(head=start, tail=end, unit='kWh', diff=False)
df = df.tz_convert('Europe/Brussels')
df = df.resample('MS')
df = df.diff().dropna()
df = df[df>0]
if df.empty:
continue
yield df
def gas_data_generator2():
# Simple roughly correct monthly data generator
# Roughly-correct means that the gas consumption between two counter values
# right before and right after a month-transition are attributed to the new month.
# However, it is robust and does not need data beyond the last month
for gas_sensor in hp.get_sensors(sensortype='gas'):
df = gas_sensor.get_data(head=start, tail=end, unit='kWh', diff=False)
df = df.tz_convert('Europe/Brussels')
df = df.resample('MS').last()
df = df.diff().dropna()
df = df[df>0]
if df.empty:
continue
yield df
def gas_data_generator3():
# More complicated but most correct correct monthly data generator
# The difference with the previous is that this generator interpolates
# at month-transitions in order to estimate the exact counter value at 00:00:00
# whereas the previous attributed all gas consumption at month-transitions to the
# new month
# Drawbacks: very slow (due to the two reindex() calls) and if there would be no
# data after the end of the last month or before beginning of first month,
# interpolation can't be made, and the entire last (or first) month has no data
for gas_sensor in hp.get_sensors(sensortype='gas'):
df = gas_sensor.get_data(head=start, tail=end, unit='kWh', diff=False)
df = df.tz_convert('Europe/Brussels')
newindex = df.resample('MS').first().index
df = df.reindex(df.index.union(newindex))
df = df.interpolate(method='time')
df = df.reindex(newindex)
df = df.diff()
df = df.shift(-1).dropna()
df = df[df>0]
if df.empty:
continue
yield df
def gas_data_generator4():
# Preferred method: as accurate as 3, and faster
# Daily approach, obtain fully correct daily data.
# To be aggregated to monthly or weekly or ...
for gas_sensor in hp.get_sensors(sensortype='gas'):
df = gas_sensor.get_data(head=start, tail=end, resample='day', unit='kWh', diff=False, tz='Europe/Brussels')
df = df.diff().shift(-1).dropna()
if df.empty:
continue
yield df
Let's have a peek
In [ ]:
gas_data1 = gas_data_generator1()
gas_data2 = gas_data_generator2()
gas_data3 = gas_data_generator3()
gas_data4 = gas_data_generator4()
In [ ]:
peek1 = next(gas_data1)
peek2 = next(gas_data2)
peek3 = next(gas_data3)
peek4 = next(gas_data4)
plt.figure()
plt.plot(peek1, label='1')
plt.plot(peek2, label='2')
plt.plot(peek3, label='3')
plt.plot(peek4.resample('MS').sum(), label='4')
plt.legend()
In [ ]:
print(peek3 - peek4.resample('MS').sum())
In [ ]:
%timeit(next(gas_data1))
%timeit(next(gas_data2))
%timeit(next(gas_data3))
%timeit(next(gas_data4))
Run this block to download the weather data and save it to a pickle. This is a large request, and you can only do 2 or 3 of these per day before your credit with Forecast.io runs out!
TODO: Use the caching library for this.
To get the data run the cell below
In [ ]:
from opengrid.library import forecastwrapper
weather = forecastwrapper.Weather(location='Ukkel, Belgium', start=start, end=end)
weather_data = weather.days().resample('MS').sum()
In [ ]:
weather_data['heatingDegreeDays16.5'].plot()
We have defined an OpenGrid analysis as a class that takes a single DataFrame as input, so we'll create that dataframe.
I wrote a generator that uses our previously defined generator so you can generate while you generate.
In [ ]:
def analysis_data_generator():
gas_data = gas_data_generator()
for gas_df in gas_data:
df = pd.concat([gas_df, weather_data['heatingDegreeDays16.5']], axis=1).dropna()
df.columns = ['gas', 'degreedays']
yield df
Let's have another peek
In [ ]:
analysis_data = analysis_data_generator()
In [ ]:
peek = next(analysis_data)
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
for axis, column, color in zip([ax1, ax2], peek.columns, ['b', 'r']):
axis.plot_date(peek.index, peek[column], '-', color=color, label=column)
plt.legend()
In [ ]:
analysis_data = analysis_data_generator()
for data in analysis_data:
try:
analysis = linearregression.LinearRegression(independent=data.degreedays, dependent=data.gas)
except ValueError as e:
print(e)
fig = analysis.plot()
fig.show()
In [ ]:
analysis_data = analysis_data_generator()
for data in analysis_data:
try:
analysis = linearregression.LinearRegression3(independent=data.degreedays, dependent=data.gas,
breakpoint=60, percentage=0.5)
except ValueError as e:
print(e)
fig = analysis.plot()
fig.show()
In [ ]: