notebook.community

Edit and run



In [1]:

    
"""
Created on Mon Apr  4 14:47:17 2016
constructs time series with aggregate point forecasts and uncertainty measures
takes as input the file constructed by SPFprogram.py
@author: EEU227
"""
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np



In [3]:

    
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'
out_data = raw_data_path + '\\CleanData\\'

out_data + 'Point_and_Ind_Uncertainty.xlsx'









    Out[3]:





'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts\\CleanData\\Point_and_Ind_Uncertainty.xlsx'



In [5]:

    
dfList = []
years = [2013,2014,2015,2016]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        try:
            raw_df = pd.read_excel(out_data + 'Point_and_Ind_Uncertainty.xlsx',sheetname=f)
            mask_t0 = str(year)
            mask_t1 = str(year+1)
            mask_t2 = str(year+2)
            if q<3:
                mask_t4or5 = str(year+4)
            else:
                mask_t4or5 = str(year+5)
            if q==1:
                mask_Rt1 = str(year) + months[q-1]
                mask_Rt2 = str(year+1) + months[q-1]
            else:
                mask_Rt1 = str(year+1) + months[q-1]
                mask_Rt2 = str(year+2) + months[q-1]
                #
            raw_df.loc[raw_df.loc[:,'target'] == mask_t0,'targetNew'] = 't'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t1,'targetNew'] = 't+1'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t2,'targetNew'] = 't+2'
            raw_df.loc[raw_df.loc[:,'target'] == mask_t4or5,'targetNew'] = 't+4'
            raw_df.loc[raw_df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
            raw_df.loc[raw_df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'
            #
            dfList.append(raw_df)
        except:
            print('sheet ' + f + ' doesnt exist yet')









    



sheet 2016Q2 doesnt exist yet
sheet 2016Q3 doesnt exist yet
sheet 2016Q4 doesnt exist yet



In [1]:

    
dfAll = pd.concat(dfList,axis=0)

df = dfAll.copy()
df.loc[:,'targetNew'].unique()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-881867c5bf8f> in <module>()
----> 1 dfAll = pd.concat(dfList,axis=1)
      2 
      3 #df = dfAll.copy()
      4 #df.loc[:,'targetNew'].unique()

NameError: name 'pd' is not defined



In [9]:

    
dfAll.head()



In [7]:

    
# Define the aggregation calculations
aggregations = {
    'point': { #
        '1.mean': 'mean',  #
         '2.std': 'std', #
         '3.IQR': lambda x: x.quantile(.75) - x.quantile(.25)
    },
    'GA_std': {#
       # 'mean': 'mean',
      'median': 'median'
    },
    'ZL_std': {#
      #  'mean': 'mean',
      'median': 'median'
    },
    'Hist_std': {#
     #   'mean': 'mean',
      'median': 'median'
    }
}



In [8]:

    
grouped = df.groupby(['source','targetNew']).agg(aggregations)
grouped = grouped.rename(columns={'point':'a. point','ZL_std':'b. ZL_std',
'GA_std':'c. GA_std','Hist_std':'d. Hist_std'})

writer = pd.ExcelWriter(out_data + 'SPF_time_series.xlsx')



In [ ]:

    
for h in df.loc[:,'targetNew'].unique():
    df_h = grouped.xs(h,level=1).sortlevel(0,axis=1)
#    print(df_h)
    df_h.to_excel(writer,h)
writer.save()

	source	target	id	point	GA_std	ZL_std	Hist_std	targetNew
0	2013-Q1	2013	1	1.80	0.717053	0.368556	0.306828	t
1	2013-Q1	2013	2	2.10	1.255695	0.602439	0.566783	t
2	2013-Q1	2013	3	1.75	3.486930	1.093401	1.145816	t
3	2013-Q1	2013	4	1.70	1.235348	0.612543	0.577511	t
4	2013-Q1	2013	6	1.50	0.511326	0.297209	0.215971	t