In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from eemeter.weather.location import zipcode_to_usaf_station
from eemeter.weather import ISDWeatherSource
import datetime
import pytz
In [2]:
np.random.seed(123)
periods = 365*4+2
index = pd.date_range('2011-01-01 00:00:00Z', periods=periods, freq='D')
In [3]:
scale = 0.005
weekday_template = np.array([1.05, 1.0, 1.02, 1.03, 1.05, 0.9, 0.95])
weekday_bias_means = weekday_template.take(index.dayofweek)
weekday_biases = np.random.gamma(shape=weekday_bias_means * (1/scale), scale=scale)
plt.plot(weekday_bias_means[:60])
plt.plot(weekday_biases[:60])
plt.show()
In [4]:
estimated = np.random.choice([True, False], size=(periods,), p=[0.05, 0.95])
plt.plot(estimated[:60])
plt.show()
In [5]:
station = zipcode_to_usaf_station("50321")
weather_source = ISDWeatherSource(station)
weather_source
temps = weather_source.indexed_temperatures(index, "degF")
hdd = np.maximum(65 - temps, 0)
cdd = np.maximum(temps - 65, 0)
hdd_cdd_norm = (hdd + cdd) / (cdd.mean() + hdd.mean())
pd.Series(hdd_cdd_norm, index=index).plot()
plt.show()
In [6]:
noise_scale = 0.04
pre_noise = np.random.gamma(shape=1/noise_scale, scale=noise_scale, size=periods)
during_noise = np.random.gamma(shape=0.9/noise_scale/5, scale=noise_scale*5, size=periods)
post_noise = np.random.gamma(shape=0.8/noise_scale, scale=noise_scale, size=periods)
retrofit_start_date = datetime.datetime(2013,6,1, tzinfo=pytz.UTC)
retrofit_end_date = datetime.datetime(2013,7,1, tzinfo=pytz.UTC)
intervention_adjustment = (
((index <= retrofit_start_date) * pre_noise) +
(((index > retrofit_start_date) & (index < retrofit_end_date)) * during_noise) +
((index >= retrofit_end_date) * post_noise)
)
plt.plot(intervention_adjustment)
plt.show()
In [7]:
avg_kwh_per_day = 10608.0 / 365
values_no_intervention = avg_kwh_per_day * hdd_cdd_norm * weekday_biases
values_with_intervention = intervention_adjustment * values_no_intervention
values_no_intervention.plot()
values_with_intervention.plot()
plt.show()
In [8]:
energy_df = pd.DataFrame({
"project_id": "ABC",
"trace_id": "DEF",
"date": index,
"value": values_with_intervention.round(decimals=1),
"unit": "kWh",
"fuel": "electricity",
"estimated": estimated,
}, index=index, columns=["project_id", "trace_id", "date", "value", "unit", "fuel", "estimated"])
energy_df.set_value(index[-1], "value", np.nan)
energy_df.set_value(index[-1], "estimated", False)
energy_df.to_csv('sample-energy-data_project-ABC_zipcode-50321.csv',
index=False, date_format="%Y-%m-%dT%H:%M:%S%z")
In [9]:
project_df = pd.DataFrame({
"project_id": "ABC",
"zipcode": "50321",
"retrofit_start_date": retrofit_start_date,
"retrofit_end_date": retrofit_end_date,
}, index=[0], columns=["project_id", "zipcode", "retrofit_start_date", "retrofit_end_date"])
project_df.to_csv('sample-project-data.csv',
index=False, date_format="%Y-%m-%dT%H:%M:%S%z")