# Untitled1

``````

In [136]:

import xlwings

``````
``````

In [137]:

xlwings.__version__

``````
``````

Out[137]:

'0.7.0'

``````
``````

In [116]:

import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np

``````
``````

In [117]:

raw_data_path = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\data'
out_data = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\CleanData\\'

``````

def fit_normal_to_hist(h): if not all(h==0): bins =np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
orig_hist = np.array(h).astype(float) norm_hist = orig_hist/float(np.sum(orig_hist)) mid_points = (bins[1:] + bins[:-1])/2 popt,pcov = opt.curve_fit(lambda x,mu,sig: stats.norm.pdf(x,mu,sig), mid_points,norm_hist) else: popt = [float('nan'),float('nan')] return popt[1]

def ZL_std(h): intervals =[[-2.0,-1.1],[-1.0,-0.6],[-0.5,-0.1],[0.0,0.4],[0.5,0.9],[1.0,1.4], [1.5,1.9],[2.0,2.4],[2.5,2.9],[3.0,3.4],[3.5,3.9],[4.0,5.0]] if not all(h==0): sum_i1 = 0 sum_i2 = 0 for i in range(1,len(h)): p = h[i]/100 v1,v2 = intervals[i] sum_i1 += p(v23 - v13)/(3(v2-v1)) sum_i2 += p(v22 - v12)/(2(v2-v1)) zl_std = np.sqrt(sum_i1 - sum_i2**2) else: zl_std = float('nan')
return zl_std

``````

In [118]:

def Hist_std(h):
if not all(h==0):
bins =np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
orig_hist = np.array(h).astype(float)
norm_hist = orig_hist/float(np.sum(orig_hist))
mid_points = (bins[1:] + bins[:-1])/2
MeanCrude = np.dot(norm_hist,mid_points)
VarCrude  = np.dot(norm_hist,(mid_points-MeanCrude)**2)
bin_widths = np.diff(bins)
BinWidth = bin_widths.mean()
VarSheppard = VarCrude - (BinWidth**2)/12       #variance, Sheppard's correction
if VarSheppard>0:
hist_std = np.sqrt(VarSheppard)
else:
hist_std = np.sqrt(VarCrude)
else:
hist_std = float('nan')
return hist_std

``````
``````

In [119]:

years = [year for year in range(2010,2017)]

``````
``````

In [120]:

years

``````
``````

Out[120]:

[2010, 2011, 2012, 2013, 2014, 2015, 2016]

``````
``````

In [124]:

#writer = pd.ExcelWriter(out_data + 'SPF-aggregate-moments.xlsx')
#years = [2015]
years = [year for year in range(2010,2017)]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
dfList = []

``````
``````

In [125]:

for year in years:
for q in quarters:
f = str(year) + 'Q' + str(q)
fname = f + '.csv'
if os.path.isfile(raw_data_path + '\\' + fname):
# find the row where the growth expectations start
dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
df['source'] = str(year) + '-Q' + str(q)
df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point',
'TN1_0':'[-2.0,-1.1]','FN1_0TN0_6':'[-1.0,-0.6]',
'FN0_5TN0_1':'[-0.5,-0.1]','F0_0T0_4':'[0.0,0.4]',
'F0_5T0_9':'[0.5,0.9]','F1_0T1_4':'[1.0,1.4]',
'F1_5T1_9':'[1.5,1.9]','F2_0T2_4':'[2.0,2.4]',
'F2_5T2_9':'[2.5,2.9]','F3_0T3_4':'[3.0,3.4]',
'F3_5T3_9':'[3.5,3.9]','F4_0':'[4.0,5.0]'})
df = df[['source','target','id','point','[-2.0,-1.1]','[-1.0,-0.6]','[-0.5,-0.1]',
'[0.0,0.4]','[0.5,0.9]','[1.0,1.4]','[1.5,1.9]','[2.0,2.4]','[2.5,2.9]',
'[3.0,3.4]','[3.5,3.9]', '[4.0,5.0]']]
# remove rows where point is missing
df.fillna(0,inplace = True)
for colname in df.columns[3:]:
df[colname] = df[colname].astype('float')
df = df[['source','target','id','point','Hist_std']]
df['id'] = df['id'].astype('int')
df['point'] = df['point'].astype('float32')
#df.to_excel(writer,f,index=False)
# create time series representation of the targets
if q<3:
else:
if q==1:
else:
#
df.loc[df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
df.loc[df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'

dfList.append(df)
#writer.save()

``````
``````

In [126]:

# Define the aggregation calculations
aggregations = {
'point': { #
'std': 'std',  #
},
'Hist_std': {
'mean': 'mean',
}
}

``````
``````

In [127]:

dfAll = pd.concat(dfList,axis=0)

``````
``````

In [128]:

``````
``````

Out[128]:

source
target
id
point
Hist_std
targetNew

0
2010-Q1
2010
1
1.0
0.623333
t

1
2010-Q1
2010
2
1.4
0.611673
t

2
2010-Q1
2010
4
1.4
0.451130
t

3
2010-Q1
2010
7
1.0
NaN
t

4
2010-Q1
2010
8
1.3
0.575429
t

``````
``````

In [129]:

dfAll.tail()

``````
``````

Out[129]:

source
target
id
point
Hist_std
targetNew

335
2016-Q1
2020
102
1.4
0.329421
t+4

336
2016-Q1
2020
103
1.8
0.690219
t+4

337
2016-Q1
2020
105
2.2
0.107905
t+4

339
2016-Q1
2020
112
2.0
0.627809
t+4

340
2016-Q1
2020
114
1.5
0.844241
t+4

``````
``````

In [130]:

grouped = dfAll.groupby(['source','targetNew']).agg(aggregations)

``````
``````

In [131]:

``````
``````

Out[131]:

point
Hist_std

std
mean

source
targetNew

2010-Q1
roll 1
0.379926
0.570146

roll 2
0.386152
0.628710

t
0.210075
0.478055

t+1
0.412391
0.568893

t+4
0.223462
0.655118

``````
``````

In [132]:

grouped['AggVar']= grouped['point']['std'] + grouped['Hist_std']['mean']

``````
``````

In [133]:

``````
``````

Out[133]:

point
Hist_std
AggVar

std
mean

source
targetNew

2010-Q1
roll 1
0.379926
0.570146
0.950073

roll 2
0.386152
0.628710
1.014862

t
0.210075
0.478055
0.688130

t+1
0.412391
0.568893
0.981285

t+4
0.223462
0.655118
0.878579

2010-Q2
roll 1
0.321718
0.517251
0.838969

roll 2
0.339143
0.572631
0.911774

t
0.215482
0.422376
0.637857

t+1
0.343113
0.515379
0.858492

t+4
0.170838
0.626043
0.796881

``````
``````

In [134]:

dfAll.loc[:,'targetNew'].unique()

``````
``````

Out[134]:

array(['t', 'roll 1', 't+1', 'roll 2', 't+4', 't+2'], dtype=object)

``````
``````

In [135]:

grouped.xs('t+4',level=1).sortlevel(0,axis=1)

``````
``````

Out[135]:

point
Hist_std
AggVar

std
mean

source

2010-Q1
0.223462
0.655118
0.878579

2010-Q2
0.170838
0.626043
0.796881

2010-Q3
0.272371
0.631932
0.904302

2010-Q4
0.289806
0.665188
0.954994

2011-Q1
0.255767
0.596972
0.852739

2011-Q2
0.202176
0.621294
0.823470

2011-Q3
0.214857
0.633878
0.848735

2011-Q4
0.267817
0.624849
0.892666

2012-Q1
0.197640
0.648206
0.845846

2012-Q2
0.320977
0.651167
0.972144

2012-Q3
0.222922
0.648010
0.870933

2012-Q4
0.268997
0.673533
0.942530

2013-Q1
0.216292
0.642344
0.858635

2013-Q2
0.206506
0.655308
0.861814

2013-Q3
0.238225
0.634926
0.873151

2013-Q4
0.254614
0.627572
0.882185

2014-Q1
0.201399
0.642977
0.844377

2014-Q2
0.187420
0.649761
0.837181

2014-Q3
0.276752
0.619263
0.896015

2014-Q4
0.181649
0.635252
0.816901

2015-Q1
0.211796
0.626370
0.838165

2015-Q2
0.179172
0.666846
0.846018

2015-Q3
0.207662
0.641744
0.849407

2015-Q4
0.195837
0.638468
0.834305

2016-Q1
0.223398
0.627756
0.851153

``````
``````

In [ ]:

for h in dfAll.loc[:,'targetNew'].unique():
df_h = grouped.xs(h,level=1).sortlevel(0,axis=1)

``````