get_SPF_Aggregates



In [1]:
"""
Created on Mon Apr  4 14:47:17 2016
constructs time series with aggregate point forecasts and uncertainty measures
takes as input the file constructed by SPFprogram.py
@author: EEU227
"""
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np

In [3]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'
out_data = raw_data_path + '\\CleanData\\'

out_data + 'Point_and_Ind_Uncertainty.xlsx'


Out[3]:
'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts\\CleanData\\Point_and_Ind_Uncertainty.xlsx'

In [5]:
dfList = []
years = [2013,2014,2015,2016]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        try:
            raw_df = pd.read_excel(out_data + 'Point_and_Ind_Uncertainty.xlsx',sheetname=f)
            mask_t0 = str(year)
            mask_t1 = str(year+1)
            mask_t2 = str(year+2)
            if q<3:
                mask_t4or5 = str(year+4)
            else:
                mask_t4or5 = str(year+5)
            if q==1:
                mask_Rt1 = str(year) + months[q-1]
                mask_Rt2 = str(year+1) + months[q-1]
            else:
                mask_Rt1 = str(year+1) + months[q-1]
                mask_Rt2 = str(year+2) + months[q-1]
                #
            raw_df.loc[raw_df.loc[:,'target'] == mask_t0,'targetNew'] = 't'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t1,'targetNew'] = 't+1'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t2,'targetNew'] = 't+2'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t4or5,'targetNew'] = 't+4'
            raw_df.loc[raw_df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
            raw_df.loc[raw_df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'
            #
            dfList.append(raw_df)
        except:
            print('sheet ' + f + ' doesnt exist yet')


sheet 2016Q2 doesnt exist yet
sheet 2016Q3 doesnt exist yet
sheet 2016Q4 doesnt exist yet

In [1]:
dfAll = pd.concat(dfList,axis=0)

df = dfAll.copy()
df.loc[:,'targetNew'].unique()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-881867c5bf8f> in <module>()
----> 1 dfAll = pd.concat(dfList,axis=1)
      2 
      3 #df = dfAll.copy()
      4 #df.loc[:,'targetNew'].unique()

NameError: name 'pd' is not defined

In [9]:
dfAll.head()


Out[9]:
source target id point GA_std ZL_std Hist_std targetNew
0 2013-Q1 2013 1 1.80 0.717053 0.368556 0.306828 t
1 2013-Q1 2013 2 2.10 1.255695 0.602439 0.566783 t
2 2013-Q1 2013 3 1.75 3.486930 1.093401 1.145816 t
3 2013-Q1 2013 4 1.70 1.235348 0.612543 0.577511 t
4 2013-Q1 2013 6 1.50 0.511326 0.297209 0.215971 t

In [7]:
# Define the aggregation calculations
aggregations = {
    'point': { #
        '1.mean': 'mean',  #
         '2.std': 'std', #
         '3.IQR': lambda x: x.quantile(.75) - x.quantile(.25)
    },
    'GA_std': {#
       # 'mean': 'mean',
      'median': 'median'
    },
    'ZL_std': {#
      #  'mean': 'mean',
      'median': 'median'
    },
    'Hist_std': {#
     #   'mean': 'mean',
      'median': 'median'
    }
}

In [8]:
grouped = df.groupby(['source','targetNew']).agg(aggregations)
grouped = grouped.rename(columns={'point':'a. point','ZL_std':'b. ZL_std',
'GA_std':'c. GA_std','Hist_std':'d. Hist_std'})

writer = pd.ExcelWriter(out_data + 'SPF_time_series.xlsx')

In [ ]:
for h in df.loc[:,'targetNew'].unique():
    df_h = grouped.xs(h,level=1).sortlevel(0,axis=1)
#    print(df_h)
    df_h.to_excel(writer,h)
writer.save()