In [1]:
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np

In [2]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\data'
out_data = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\CleanData\\'

In [82]:
#writer = pd.ExcelWriter(out_data + 'SPF-aggregate-moments.xlsx')
#years = [2015]
years = [year for year in range(1999,2016)]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
dfList = []

In [83]:
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        fname = f + '.csv'
        if os.path.isfile(raw_data_path + '\\' + fname):
            raw_df = pd.read_csv(raw_data_path + '\\' + fname,header = 1)
            # find the row where the growth expectations start
            mask_columns = ~raw_df.columns.str.contains('Unnamed')
            dfList.append(raw_df.columns[mask_columns])

In [84]:
df = pd.DataFrame(dfList)

In [90]:
bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.5])
len(bins)


Out[90]:
10

In [98]:
def Hist_std(h):
    if not all(h==0):
        if len(h) == 9:
            bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.5])
        elif len(h) == 10:
            bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0]) 
        elif len(h)== 13:
            bins =np.array([-3-0,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])              
        elif len(h)== 12:
            bins =np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])  
            #
        orig_hist = np.array(h).astype(float)
        norm_hist = orig_hist/float(np.sum(orig_hist))
        mid_points = (bins[1:] + bins[:-1])/2
        MeanCrude = np.dot(norm_hist,mid_points)
        VarCrude  = np.dot(norm_hist,(mid_points-MeanCrude)**2)
        bin_widths = np.diff(bins)
        BinWidth = bin_widths.mean()
        VarSheppard = VarCrude - (BinWidth**2)/12       #variance, Sheppard's correction
        if VarSheppard>0:
            hist_std = np.sqrt(VarSheppard)
        else:
            hist_std = np.sqrt(VarCrude)
    else:
        hist_std = float('nan')
    return hist_std

In [101]:
df.iloc[41,:]


Out[101]:
0     TARGET_PERIOD
1        FCT_SOURCE
2             POINT
3             TN2_0
4        FN2_0TN1_6
5        FN1_5TN1_1
6        FN1_0TN0_6
7        FN0_5TN0_1
8          F0_0T0_4
9          F0_5T0_9
10         F1_0T1_4
11         F1_5T1_9
12         F2_0T2_4
13         F2_5T2_9
14         F3_0T3_4
15         F3_5T3_9
16             F4_0
Name: 41, dtype: object

In [100]:
Hist_std(df.iloc[41,:])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-100-a2f6d947f736> in <module>()
----> 1 Hist_std(df.iloc[41,:])

<ipython-input-98-6e9e4d84349f> in Hist_std(h)
     10             bins =np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
     11             #
---> 12         orig_hist = np.array(h).astype(float)
     13         norm_hist = orig_hist/float(np.sum(orig_hist))
     14         mid_points = (bins[1:] + bins[:-1])/2

ValueError: could not convert string to float: 'F4_0'

In [80]:
df.iloc[:,3].unique()


Out[80]:
array(['T0_0'], dtype=object)

In [81]:
for i in df.columns:
    print(df.loc[:,i].value_counts())


TARGET_PERIOD    8
Name: 0, dtype: int64
FCT_SOURCE    8
Name: 1, dtype: int64
POINT    8
Name: 2, dtype: int64
T0_0    8
Name: 3, dtype: int64
F0_0T0_4    8
Name: 4, dtype: int64
F0_5T0_9    8
Name: 5, dtype: int64
F1_0T1_4    8
Name: 6, dtype: int64
F1_5T1_9    8
Name: 7, dtype: int64
F2_0T2_4    8
Name: 8, dtype: int64
F2_5T2_9    8
Name: 9, dtype: int64
F3_0T3_4    8
Name: 10, dtype: int64
F3_5        7
F3_5T3_9    1
Name: 11, dtype: int64
F4_0    1
Name: 12, dtype: int64

In [ ]:


In [ ]: