In [1]:
import numpy as np
import pandas as pd
In [133]:
# Generating a dictionary with all the forecast data
wf_base = '../data/windforecasts_wf'
features = ['u', 'v', 'ws', 'wd']
wf_forecast_dict = {}
for wf_id in range(1, 8):
wf_file = wf_base + str(wf_id) + '.csv'
wf_data = pd.read_csv(wf_file)
for idx, row in wf_data.iterrows():
key = str(int(row['date']) + (int(row['hors']))) + '_' + str(wf_id)
if key not in wf_forecast_dict:
wf_forecast_dict[key] = [row[features]]
else:
wf_forecast_dict[key].append(row[features])
In [148]:
train = pd.read_csv('../data/train.csv')
train_virtual = np.concatenate((train['wp1'].values, train['wp2'].values, train['wp3'].values, train['wp4'].values, train['wp5'].values, train['wp6'].values, train['wp7'].values))
wf_ids = np.concatenate(([1] * len(train['wp1']), [2] * len(train['wp2']), [3] * len(train['wp3']), [4] * len(train['wp4']), [5] * len(train['wp5']), [6] * len(train['wp6']), [7] * len(train['wp7'])))
wf_dates = np.ravel([train['date'].values] * 7)
virtual_data = pd.DataFrame(np.vstack((wf_dates, wf_ids, train_virtual)).T, columns=['date', 'wf_id', 'energy'])
In [136]:
avg_forecast = []
for idx, row in virtual_data.iterrows():
key = str(int(row['date'])) + '_' + str(int(row['wf_id']))
if key not in wf_forecast_dict:
avg_forecast.append(np.array([np.nan, np.nan, np.nan, np.nan]))
else:
avg_forecast.append(np.average(wf_forecast_dict[key], axis=0))
In [159]:
avg_forecast = pd.DataFrame(avg_forecast, columns=features)
virtual_data_aggregate = virtual_data.join(avg_forecast)
virtual_data_aggregate[['date', 'wf_id']] = virtual_data_aggregate[['date', 'wf_id']].astype(int)
virtual_data_aggregate.to_csv('../data/virtual_aggregate_data.csv')
In [ ]: