In [1]:
"""
Created on Mon Apr 4 14:47:17 2016
constructs time series with aggregate point forecasts and uncertainty measures
takes as input the file constructed by SPFprogram.py
@author: EEU227
"""
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np
In [3]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'
out_data = raw_data_path + '\\CleanData\\'
out_data + 'Point_and_Ind_Uncertainty.xlsx'
Out[3]:
In [5]:
dfList = []
years = [2013,2014,2015,2016]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
for year in years:
for q in quarters:
f = str(year) + 'Q' + str(q)
try:
raw_df = pd.read_excel(out_data + 'Point_and_Ind_Uncertainty.xlsx',sheetname=f)
mask_t0 = str(year)
mask_t1 = str(year+1)
mask_t2 = str(year+2)
if q<3:
mask_t4or5 = str(year+4)
else:
mask_t4or5 = str(year+5)
if q==1:
mask_Rt1 = str(year) + months[q-1]
mask_Rt2 = str(year+1) + months[q-1]
else:
mask_Rt1 = str(year+1) + months[q-1]
mask_Rt2 = str(year+2) + months[q-1]
#
raw_df.loc[raw_df.loc[:,'target'] == mask_t0,'targetNew'] = 't'
raw_df.loc[raw_df.loc[:,'target'] == mask_t1,'targetNew'] = 't+1'
raw_df.loc[raw_df.loc[:,'target'] == mask_t2,'targetNew'] = 't+2'
raw_df.loc[raw_df.loc[:,'target'] == mask_t4or5,'targetNew'] = 't+4'
raw_df.loc[raw_df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
raw_df.loc[raw_df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'
#
dfList.append(raw_df)
except:
print('sheet ' + f + ' doesnt exist yet')
In [1]:
dfAll = pd.concat(dfList,axis=0)
df = dfAll.copy()
df.loc[:,'targetNew'].unique()
In [9]:
dfAll.head()
Out[9]:
In [7]:
# Define the aggregation calculations
aggregations = {
'point': { #
'1.mean': 'mean', #
'2.std': 'std', #
'3.IQR': lambda x: x.quantile(.75) - x.quantile(.25)
},
'GA_std': {#
# 'mean': 'mean',
'median': 'median'
},
'ZL_std': {#
# 'mean': 'mean',
'median': 'median'
},
'Hist_std': {#
# 'mean': 'mean',
'median': 'median'
}
}
In [8]:
grouped = df.groupby(['source','targetNew']).agg(aggregations)
grouped = grouped.rename(columns={'point':'a. point','ZL_std':'b. ZL_std',
'GA_std':'c. GA_std','Hist_std':'d. Hist_std'})
writer = pd.ExcelWriter(out_data + 'SPF_time_series.xlsx')
In [ ]:
for h in df.loc[:,'targetNew'].unique():
df_h = grouped.xs(h,level=1).sortlevel(0,axis=1)
# print(df_h)
df_h.to_excel(writer,h)
writer.save()