In [1]:
import os
import pandas as pd

In [2]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'

In [3]:
if not os.path.isdir(raw_data_path + '\\CleanData'):
    os.mkdir(raw_data_path + '\\CleanData')

In [4]:
out_data = raw_data_path + '\\CleanData\\'

In [5]:
out_data


Out[5]:
'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts\\CleanData\\'

In [6]:
dfList = []
writer = pd.ExcelWriter(out_data + 'PointForecasts.xlsx')

years = [2014,2015,2016]
quarters = [1,2,3,4]
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        fname = f + '.csv'
        if os.path.isfile(raw_data_path + '\\' + fname):
            raw_df = pd.read_csv(raw_data_path + '\\' + fname,header = True)
            # find the row where the growth expectations start
            dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
            mask_columns = ~raw_df.columns.str.contains('Unnamed')
            df = raw_df.iloc[0:dum-1,[0,1,2]]    
            df['source'] = str(year) + '-Q' + str(q)
            df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point'})
            df = df[['source','target','id','point']]
            df['id'] = df['id'].astype('int')
            df['point'] = df['point'].astype('float32')
            df.to_excel(writer,f,index=False)
            dfList.append(df)  
writer.save()

In [7]:
# Define the aggregation calculations
aggregations = {
    'point': { # 
        'mean': 'mean',  # 
        'variance': 'var', #
    }
}

In [8]:
dfAll = pd.concat(dfList,axis=0)
dfAll.groupby(['source','target']).agg(aggregations)


Out[8]:
point
mean variance
source target
2014-Q1 2014 1.148822 0.033833
2014Dec 1.345396 0.033900
2015 1.437540 0.051027
2015Dec 1.528118 0.059527
2016 1.689222 0.032140
2018 1.865481 0.040562
2014-Q2 2014 0.865301 0.025511
2015 1.289622 0.055716
2015Mar 1.244606 0.053488
2016 1.545966 0.058250
2016Mar 1.446395 0.060174
2018 1.848307 0.035126
2014-Q3 2014 0.713096 0.018613
2015 1.196929 0.046511
2015Jun 1.181592 0.053918
2016 1.534333 0.035945
2016Jun 1.492149 0.034311
2019 1.858864 0.076592
2014-Q4 2014 0.526748 0.005561
2015 0.957279 0.044708
2015Sep 1.065511 0.060095
2016 1.351357 0.073306
2016Sep 1.369630 0.080651
2019 1.801161 0.032996
2015-Q1 2015 0.252707 0.103503
2015Dec 0.755420 0.179792
2016 1.057263 0.105719
2016Dec 1.221326 0.120416
2017 1.489798 0.079780
2019 1.770240 0.044857
2015-Q2 2015 0.094257 0.035210
2016 1.179801 0.061636
2016Mar 0.981652 0.156936
2017 1.562587 0.056315
2017Mar 1.438915 0.062537
2019 1.836707 0.032103
2015-Q3 2015 0.242508 0.015741
2016 1.257033 0.067731
2016Jun 1.207638 0.089087
2017 1.595614 0.067669
2017Jun 1.535782 0.047960
2020 1.856788 0.043124
2015-Q4 2015 0.112460 0.007842
2016 1.049451 0.050741
2016Sep 1.103721 0.080145
2017 1.534099 0.071899
2017Sep 1.528929 0.054251
2020 1.862534 0.038352
2016-Q1 2016 0.743109 0.081273
2016Dec 1.154667 0.154125
2017 1.420783 0.073569
2017Dec 1.509689 0.084609
2018 1.632693 0.068987
2020 1.801522 0.049907

In [ ]:


In [ ]:


In [ ]:


In [90]:
# Define the aggregation calculations
aggregations = {
    'point': { # 
        'mean': 'mean',  # 
        'variance': 'var', #
    }
}
# Perform groupby aggregation by "month", but only on the rows that are of type "call"

In [ ]:


In [ ]:


In [116]:
from scipy.stats import norm
import numpy as np

In [119]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

In [117]:
x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100)

In [120]:
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')


Out[120]:
[<matplotlib.lines.Line2D at 0xa6143c8>]

In [ ]: