06-29-2016-Plot-KDEs-OF-aggregate-histograms



In [2]:
%matplotlib inline
import matplotlib as mpl
import pandas as pd
import numpy as np
import os

import seaborn as sns
sns.set(style="white")
import matplotlib.pyplot as plt

In [11]:
def get_draws(hist):

    draws = pd.Series()

    for i in range(len(hist)):
        lb = float(hist.index[i].split(',')[0].strip('['))
        ub = float(hist.index[i].split(',')[1].strip(']'))
        
        n = int(round(hist[i]*10))
        draws_ =  np.random.uniform(lb,ub,n)
        draws_ = pd.Series(draws_)
        draws = pd.concat((draws,draws_), axis=0,ignore_index=True)
        draws.name = hist.name
    return draws

In [85]:
def get_df_draws(df):
    
    df_draws = pd.DataFrame()
    for id in range(df.shape[0]):
        #print(id)
        hist = df.iloc[id,2:14]
        hist.name = df.loc[id,'target']
        draws = get_draws(hist)
        df_draws[draws.name] = draws
    return df_draws

In [86]:
f = 'agg_hist_' + str(2015) + 'Q' + str(4)
fname = f + '.csv'
raw_df = pd.read_csv('../clean_data/' + fname,header = 0)
raw_df


Out[86]:
source target [-2.0,-1.1] [-1.0,-0.6] [-0.5,-0.1] [0.0,0.4] [0.5,0.9] [1.0,1.4] [1.5,1.9] [2.0,2.4] [2.5,2.9] [3.0,3.4] [3.5,3.9] [4.0,5.0]
0 2015-Q4 2015 0.602000 2.276000 24.296903 63.002736 7.678361 1.576000 0.398000 0.098000 0.040000 0.022000 0.010000 0.000000
1 2015-Q4 2016 0.219028 0.829607 3.714362 11.753718 29.132909 34.504901 14.095859 4.038049 1.155750 0.367736 0.124080 0.064002
2 2015-Q4 2016Sep 0.366086 0.954973 4.393809 11.584200 27.678981 30.215566 15.823224 5.869419 2.134730 0.655915 0.233833 0.089264
3 2015-Q4 2017 0.284444 0.653333 1.560000 5.448889 11.971111 26.715556 33.200000 14.075556 4.217778 1.426667 0.326667 0.120000
4 2015-Q4 2017Sep 0.237143 0.697143 1.568571 5.814286 12.582857 26.068571 31.622857 13.760000 4.948571 1.934286 0.522857 0.242857
5 2015-Q4 2020 0.235135 0.286486 0.935135 2.864865 8.283784 19.289189 32.118919 20.972973 9.859459 3.575676 1.197297 0.381081

In [87]:
#../clean_data/agg_hist_2016Q1.csv
df_big = pd.DataFrame(columns = ['source','target','point'])

years = [2015,2016]
quarters = [1,2,3,4]
df_List = []
for year in years:
    for q in quarters:
        f = 'agg_hist_' + str(year) + 'Q' + str(q)
        fname = f + '.csv'
        if os.path.isfile('../clean_data/' + fname):
            raw_df = pd.read_csv('../clean_data/' + fname,header = 0)
            # find the row where the growth expectations start
            df_draws = get_df_draws(raw_df)
            
            df_melt = pd.melt(df_draws)
            df_melt.rename(columns={'variable':'target', 'value':'point'}, inplace=True)
            df_melt['source'] = str(year) + '-Q' + str(q)
            df_melt = df_melt[['source','target','point']]
            df_melt = df_melt[df_melt['point'].notnull()]
            df_big = pd.concat([df_big,df_melt], axis=0)

In [73]:
df = df_big

In [75]:
df.head()


Out[75]:
source target point
0 2015-Q1 2015 -1.409785
1 2015-Q1 2015 -1.958777
2 2015-Q1 2015 -1.250117
3 2015-Q1 2015 -1.788678
4 2015-Q1 2015 -1.841659

In [ ]:


In [59]:
target = '2020'
mask1 = (df.target == target) & (df.source == '2016-Q1')
mask2 = (df.target == target) & (df.source == '2016-Q2')

In [60]:
x1 = df.loc[mask1,'point'].values
x2 = df.loc[mask2,'point'].values
#x1 = x1.reindex()
#x2 = x2.reindex()
x1 = pd.Series(x1, name="$X_1$")
x2 = pd.Series(x2, name="$X_2$")

In [14]:
#sns.jointplot(x=x1, y = x2,kind='reg');

In [15]:
#sns.distplot(x1,norm_hist=True);

In [17]:
#default_rcParams = dict(mpl.rcParams)
#plt.style.use('fivethirtyeight')

In [284]:
with plt.style.context('ggplot'):
    plt.figure(figsize = (10,6))
    #fig, ax = plt.subplots()

    sns.distplot(x1, hist=False, rug=False, kde_kws={'shade':True}, color='b', label='2016-Q1')
    sns.distplot(x2, hist=False, rug=False, kde_kws={'shade':True}, color='g', label='2016-Q2')
    plt.xlabel('')
    plt.legend(loc='best', fontsize='medium', title='',)
    plt.title('Distributions of SPF point forecasts for HICP inflation in '+target, fontsize='large')
    ;
    plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.png')
    plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.pdf')
    plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.jpeg')



In [236]:
ls ../figures/


2016-04-06-aggregate_histograms_2016.pdf
2016-04-06-aggregate_histograms_2016.png
2016-04-06-aggregate_histograms_2016.svg
2016-04-06ggregate_histograms_2016.eps

In [226]:
g = sns.jointplot(x1,x2, kind="kde", size=7, space=0)



In [ ]:
plt.figure()

for i in range(1, 7):
    plt.subplot(3, 2, i)
    plt.title(i)
    plt.xticks([])
    plt.yticks([])

plt.tight_layout()

In [66]:
plt.figure(figsize = (10,6))

sns.set(style="white", palette="muted", color_codes=True)
#fig, axes = plt.subplots(2, 2, figsize = (7,7) )

years = [2016, 2017, 2018, 2020]
bins = [-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0]

for i in range(1,5):
    target = str(years[i-1])

    mask1 = (df.target == target) & (df.source == '2016-Q1')
    mask2 = (df.target == target) & (df.source == '2016-Q2')
    
    x1 = df.loc[mask1,'point'].values
    x2 = df.loc[mask2,'point'].values
    x1 = pd.Series(x1, name="$X_1$")
    x2 = pd.Series(x2, name="$X_2$")

    plt.subplot(2, 2, i)
    plt.title(str(target)) #, fontsize='large')
    sns.distplot(x1, hist=False, bins = bins, rug=False, kde_kws={'shade':True}, color='b', label='2016-Q1')
    sns.distplot(x2, hist=False, bins = bins, rug=False, kde_kws={'shade':True}, color='g', label='2016-Q2')
    plt.xlabel('')
    plt.legend(loc='best', fontsize='medium', title='',)




plt.tight_layout()
plt.suptitle('Distributions of SPF aggregate histograms for HICP inflation', fontsize='large',  y = 1.04)
plt.subplots_adjust(hspace=0.4)
#plt.suptitle("I never said they'd be pretty")
;


Out[66]:
''

In [76]:
target = '2017'
mask1 = (df.target == target) & (df.source == '2016-Q1')
mask2 = (df.target == target) & (df.source == '2016-Q2')
mask3 = (df.target == target) & (df.source == '2015-Q4')

In [82]:
df[mask3]


Out[82]:
source target point
3000 2015-Q4 2017 -1.165788
3001 2015-Q4 2017 -1.126949
3002 2015-Q4 2017 -1.180318
3003 2015-Q4 2017 -0.879724
3004 2015-Q4 2017 -0.940738
3005 2015-Q4 2017 -0.621093
3006 2015-Q4 2017 -0.986387
3007 2015-Q4 2017 -0.969425
3008 2015-Q4 2017 -0.864457
3009 2015-Q4 2017 -0.856369
3010 2015-Q4 2017 -0.435762
3011 2015-Q4 2017 -0.279734
3012 2015-Q4 2017 -0.137220
3013 2015-Q4 2017 -0.345218
3014 2015-Q4 2017 -0.362124
3015 2015-Q4 2017 -0.283180
3016 2015-Q4 2017 -0.123055
3017 2015-Q4 2017 -0.344843
3018 2015-Q4 2017 -0.285272
3019 2015-Q4 2017 -0.330663
3020 2015-Q4 2017 -0.390812
3021 2015-Q4 2017 -0.370879
3022 2015-Q4 2017 -0.380902
3023 2015-Q4 2017 -0.481423
3024 2015-Q4 2017 -0.478815
3025 2015-Q4 2017 -0.327733
3026 2015-Q4 2017 0.040882
3027 2015-Q4 2017 0.351606
3028 2015-Q4 2017 0.127373
3029 2015-Q4 2017 0.236617
... ... ... ...
3970 2015-Q4 2017 2.879025
3971 2015-Q4 2017 2.896136
3972 2015-Q4 2017 2.685255
3973 2015-Q4 2017 2.642382
3974 2015-Q4 2017 2.862821
3975 2015-Q4 2017 2.668511
3976 2015-Q4 2017 2.667397
3977 2015-Q4 2017 2.850070
3978 2015-Q4 2017 2.843807
3979 2015-Q4 2017 2.580333
3980 2015-Q4 2017 2.898319
3981 2015-Q4 2017 2.685848
3982 2015-Q4 2017 3.309759
3983 2015-Q4 2017 3.139932
3984 2015-Q4 2017 3.070125
3985 2015-Q4 2017 3.241399
3986 2015-Q4 2017 3.370024
3987 2015-Q4 2017 3.208320
3988 2015-Q4 2017 3.139248
3989 2015-Q4 2017 3.174686
3990 2015-Q4 2017 3.243535
3991 2015-Q4 2017 3.311000
3992 2015-Q4 2017 3.021619
3993 2015-Q4 2017 3.357079
3994 2015-Q4 2017 3.179388
3995 2015-Q4 2017 3.282181
3996 2015-Q4 2017 3.657529
3997 2015-Q4 2017 3.533769
3998 2015-Q4 2017 3.856055
3999 2015-Q4 2017 4.820237

1000 rows × 3 columns


In [84]:
plt.figure(figsize = (10,6))

sns.set(style="white", palette="muted", color_codes=True)
#fig, axes = plt.subplots(2, 2, figsize = (7,7) )

years = [2017]
bins = [-2.0,-1.0,-0.5,0.0,0.5,1.0,1.5,2.0,2.5,3.0,3.5,4.0,5.0]

for i in range(1,2):
    target = str(years[i-1])

    mask1 = (df.target == target) & (df.source == '2016-Q1')
    mask2 = (df.target == target) & (df.source == '2016-Q2')
    mask3 = (df.target == target) & (df.source == '2015-Q4')

    
    x1 = df.loc[mask1,'point'].values
    x2 = df.loc[mask2,'point'].values
    x3 = df.loc[mask3,'point'].values

    
    x1 = pd.Series(x1, name="$X_1$")
    x2 = pd.Series(x2, name="$X_2$")
    x3 = pd.Series(x3, name="$X_3$")
    
    plt.subplot(1, 1, i)
    plt.title(str(target)) #, fontsize='large')
    sns.distplot(x1, hist=False, bins = bins, rug=False, kde_kws={'shade':True}, color='b', label='2016-Q1')
    sns.distplot(x2, hist=False, bins = bins, rug=False, kde_kws={'shade':True}, color='g', label='2016-Q2')
    sns.distplot(x3, hist=False, bins = bins, rug=False, kde_kws={'shade':True}, color='y', label='2015-Q4')    
    plt.xlabel('')
    plt.legend(loc='best', fontsize='medium', title='',)




plt.tight_layout()
plt.suptitle('Distributions of SPF aggregate histograms for HICP inflation', fontsize='large',  y = 1.04)
plt.subplots_adjust(hspace=0.4)
#plt.suptitle("I never said they'd be pretty")
;


Out[84]:
''

In [ ]:
plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.png')
plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.pdf')
plt.savefig('../figures/Source2016_Q1_Q2-Target' + target +'.jpeg')