In [1]:
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np
In [2]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\data'
out_data = 'C:\\Users\\EEU227\\Documents\\Projects\\Inflation\\SPF\\CleanData\\'
In [82]:
#writer = pd.ExcelWriter(out_data + 'SPF-aggregate-moments.xlsx')
#years = [2015]
years = [year for year in range(1999,2016)]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
dfList = []
In [83]:
for year in years:
for q in quarters:
f = str(year) + 'Q' + str(q)
fname = f + '.csv'
if os.path.isfile(raw_data_path + '\\' + fname):
raw_df = pd.read_csv(raw_data_path + '\\' + fname,header = 1)
# find the row where the growth expectations start
mask_columns = ~raw_df.columns.str.contains('Unnamed')
dfList.append(raw_df.columns[mask_columns])
In [84]:
df = pd.DataFrame(dfList)
In [90]:
bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.5])
len(bins)
Out[90]:
In [98]:
def Hist_std(h):
if not all(h==0):
if len(h) == 9:
bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.5])
elif len(h) == 10:
bins =np.array([-1.0,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
elif len(h)== 13:
bins =np.array([-3-0,-2.0,-1.5,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
elif len(h)== 12:
bins =np.array([-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0])
#
orig_hist = np.array(h).astype(float)
norm_hist = orig_hist/float(np.sum(orig_hist))
mid_points = (bins[1:] + bins[:-1])/2
MeanCrude = np.dot(norm_hist,mid_points)
VarCrude = np.dot(norm_hist,(mid_points-MeanCrude)**2)
bin_widths = np.diff(bins)
BinWidth = bin_widths.mean()
VarSheppard = VarCrude - (BinWidth**2)/12 #variance, Sheppard's correction
if VarSheppard>0:
hist_std = np.sqrt(VarSheppard)
else:
hist_std = np.sqrt(VarCrude)
else:
hist_std = float('nan')
return hist_std
In [101]:
df.iloc[41,:]
Out[101]:
In [100]:
Hist_std(df.iloc[41,:])
In [80]:
df.iloc[:,3].unique()
Out[80]:
In [81]:
for i in df.columns:
print(df.loc[:,i].value_counts())
In [ ]:
In [ ]: