notebook.community

Edit and run



In [1]:

    
import os
import pandas as pd



In [2]:

    
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'



In [3]:

    
if not os.path.isdir(raw_data_path + '\\CleanData'):
    os.mkdir(raw_data_path + '\\CleanData')



In [4]:

    
out_data = raw_data_path + '\\CleanData\\'



In [5]:

    
out_data









    Out[5]:





'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts\\CleanData\\'



In [6]:

    
dfList = []
writer = pd.ExcelWriter(out_data + 'PointForecasts.xlsx')

years = [2014,2015,2016]
quarters = [1,2,3,4]
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        fname = f + '.csv'
        if os.path.isfile(raw_data_path + '\\' + fname):
            raw_df = pd.read_csv(raw_data_path + '\\' + fname,header = True)
            # find the row where the growth expectations start
            dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
            mask_columns = ~raw_df.columns.str.contains('Unnamed')
            df = raw_df.iloc[0:dum-1,[0,1,2]]    
            df['source'] = str(year) + '-Q' + str(q)
            df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point'})
            df = df[['source','target','id','point']]
            df['id'] = df['id'].astype('int')
            df['point'] = df['point'].astype('float32')
            df.to_excel(writer,f,index=False)
            dfList.append(df)  
writer.save()



In [7]:

    
# Define the aggregation calculations
aggregations = {
    'point': { # 
        'mean': 'mean',  # 
        'variance': 'var', #
    }
}



In [8]:

    
dfAll = pd.concat(dfList,axis=0)
dfAll.groupby(['source','target']).agg(aggregations)



In [ ]:



In [ ]:



In [ ]:



In [90]:

    
# Define the aggregation calculations
aggregations = {
    'point': { # 
        'mean': 'mean',  # 
        'variance': 'var', #
    }
}
# Perform groupby aggregation by "month", but only on the rows that are of type "call"



In [ ]:



In [ ]:



In [116]:

    
from scipy.stats import norm
import numpy as np



In [119]:

    
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)



In [117]:

    
x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100)



In [120]:

    
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')









    Out[120]:





[<matplotlib.lines.Line2D at 0xa6143c8>]



In [ ]:

		point
		mean	variance
source	target
2014-Q1	2014	1.148822	0.033833
	2014Dec	1.345396	0.033900
	2015	1.437540	0.051027
	2015Dec	1.528118	0.059527
	2016	1.689222	0.032140
	2018	1.865481	0.040562
2014-Q2	2014	0.865301	0.025511
	2015	1.289622	0.055716
	2015Mar	1.244606	0.053488
	2016	1.545966	0.058250
	2016Mar	1.446395	0.060174
	2018	1.848307	0.035126
2014-Q3	2014	0.713096	0.018613
	2015	1.196929	0.046511
	2015Jun	1.181592	0.053918
	2016	1.534333	0.035945
	2016Jun	1.492149	0.034311
	2019	1.858864	0.076592
2014-Q4	2014	0.526748	0.005561
	2015	0.957279	0.044708
	2015Sep	1.065511	0.060095
	2016	1.351357	0.073306
	2016Sep	1.369630	0.080651
	2019	1.801161	0.032996
2015-Q1	2015	0.252707	0.103503
	2015Dec	0.755420	0.179792
	2016	1.057263	0.105719
	2016Dec	1.221326	0.120416
	2017	1.489798	0.079780
	2019	1.770240	0.044857
2015-Q2	2015	0.094257	0.035210
	2016	1.179801	0.061636
	2016Mar	0.981652	0.156936
	2017	1.562587	0.056315
	2017Mar	1.438915	0.062537
	2019	1.836707	0.032103
2015-Q3	2015	0.242508	0.015741
	2016	1.257033	0.067731
	2016Jun	1.207638	0.089087
	2017	1.595614	0.067669
	2017Jun	1.535782	0.047960
	2020	1.856788	0.043124
2015-Q4	2015	0.112460	0.007842
	2016	1.049451	0.050741
	2016Sep	1.103721	0.080145
	2017	1.534099	0.071899
	2017Sep	1.528929	0.054251
	2020	1.862534	0.038352
2016-Q1	2016	0.743109	0.081273
	2016Dec	1.154667	0.154125
	2017	1.420783	0.073569
	2017Dec	1.509689	0.084609
	2018	1.632693	0.068987
	2020	1.801522	0.049907