06-30-2016-Bubble-Plot-Aggregated-Point-Forecasts



In [45]:
import pandas as pd
import numpy as np
import os

# (*) Import plotly package
import plotly

# Check plolty version (if not latest, please upgrade)
#plotly.__version__

# (*) To communicate with Plotly's server, sign in with credentials file
import plotly.plotly as py  

# (*) Useful Python/Plotly tools
import plotly.tools as tls   

# (*) Graph objects to piece together plots
from plotly.graph_objs import *

In [106]:
df_big = pd.DataFrame(columns = ['source','target','id','point'])

years = [2015,2016]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
df_List = []
for year in years:
    for q in quarters:
        f = str(year) + 'Q' + str(q)
        fname = f + '.csv'
        if os.path.isfile('../data/' + fname):
            raw_df = pd.read_csv('../data/' + fname,header = 1)
            # find the row where the growth expectations start
            dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
            mask_columns = ~raw_df.columns.str.contains('Unnamed')
            df = raw_df.iloc[0:dum-1,mask_columns]    
            df['source'] = str(year) + '-Q' + str(q)
            df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point',
                                   'TN1_0':'[-2.0,-1.1]','FN1_0TN0_6':'[-1.0,-0.6]',
                                   'FN0_5TN0_1':'[-0.5,-0.1]','F0_0T0_4':'[0.0,0.4]',
                                   'F0_5T0_9':'[0.5,0.9]','F1_0T1_4':'[1.0,1.4]',
                                   'F1_5T1_9':'[1.5,1.9]','F2_0T2_4':'[2.0,2.4]',
                                   'F2_5T2_9':'[2.5,2.9]','F3_0T3_4':'[3.0,3.4]',
                                   'F3_5T3_9':'[3.5,3.9]','F4_0':'[4.0,5.0]'})
            df = df[['source','target','id','point']]
            # remove rows where point is missing
            maskNaN = df.point.isnull()
            df = df[~maskNaN]                    
            df.fillna(0,inplace = True)
            for colname in df.columns[3:]:
                df[colname] = df[colname].astype('float')
            # create a new target column 
            mask_t0 = str(year)
            mask_t1 = str(year+1)
            mask_t2 = str(year+2)
            if q<3:
                mask_t4or5 = str(year+4)
            else:
                mask_t4or5 = str(year+5)
            if q==1:
                mask_Rt1 = str(year) + months[q-1]
                mask_Rt2 = str(year+1) + months[q-1]
            else:
                mask_Rt1 = str(year+1) + months[q-1]
                mask_Rt2 = str(year+2) + months[q-1]
                #
            df.loc[df.loc[:,'target'] == mask_t0,'targetNew'] = 't'
            df.loc[df.loc[:,'target'] == mask_t1,'targetNew'] = 't+1'
            df.loc[df.loc[:,'target'] == mask_t2,'targetNew'] = 't+2'
            df.loc[df.loc[:,'target'] == mask_t4or5,'targetNew'] = 't+4'
            df.loc[df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
            df.loc[df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'


            
            df_big = pd.concat([df_big,df], axis=0, ignore_index=True)
            #df_List.append(df)

In [108]:
df_big.head(100)


Out[108]:
id point source target targetNew
0 1 0.200000 2015-Q1 2015 t
1 2 0.200000 2015-Q1 2015 t
2 3 0.200000 2015-Q1 2015 t
3 4 0.100000 2015-Q1 2015 t
4 5 -0.200000 2015-Q1 2015 t
5 6 0.000000 2015-Q1 2015 t
6 7 0.900000 2015-Q1 2015 t
7 14 0.500000 2015-Q1 2015 t
8 15 0.600000 2015-Q1 2015 t
9 16 0.200000 2015-Q1 2015 t
10 20 -0.500000 2015-Q1 2015 t
11 22 -0.200000 2015-Q1 2015 t
12 23 -0.100000 2015-Q1 2015 t
13 24 -0.100000 2015-Q1 2015 t
14 29 0.200000 2015-Q1 2015 t
15 31 0.800000 2015-Q1 2015 t
16 32 0.300000 2015-Q1 2015 t
17 35 0.300000 2015-Q1 2015 t
18 36 0.200000 2015-Q1 2015 t
19 37 -0.200000 2015-Q1 2015 t
20 39 0.000000 2015-Q1 2015 t
21 41 0.600000 2015-Q1 2015 t
22 42 0.100000 2015-Q1 2015 t
23 45 0.250000 2015-Q1 2015 t
24 47 0.300000 2015-Q1 2015 t
25 48 0.600000 2015-Q1 2015 t
26 49 -0.100000 2015-Q1 2015 t
27 52 -0.400000 2015-Q1 2015 t
28 54 0.500000 2015-Q1 2015 t
29 57 0.500000 2015-Q1 2015 t
... ... ... ... ... ...
70 24 1.300000 2015-Q1 2015Dec roll 1
71 29 0.400000 2015-Q1 2015Dec roll 1
72 31 0.900000 2015-Q1 2015Dec roll 1
73 36 0.300000 2015-Q1 2015Dec roll 1
74 37 0.400000 2015-Q1 2015Dec roll 1
75 39 0.800000 2015-Q1 2015Dec roll 1
76 42 0.300000 2015-Q1 2015Dec roll 1
77 47 1.300000 2015-Q1 2015Dec roll 1
78 48 0.600000 2015-Q1 2015Dec roll 1
79 49 0.300000 2015-Q1 2015Dec roll 1
80 52 0.400000 2015-Q1 2015Dec roll 1
81 54 0.900000 2015-Q1 2015Dec roll 1
82 57 1.000000 2015-Q1 2015Dec roll 1
83 61 1.200000 2015-Q1 2015Dec roll 1
84 68 0.500000 2015-Q1 2015Dec roll 1
85 82 0.500000 2015-Q1 2015Dec roll 1
86 85 0.638742 2015-Q1 2015Dec roll 1
87 88 0.900000 2015-Q1 2015Dec roll 1
88 89 1.800000 2015-Q1 2015Dec roll 1
89 90 0.800000 2015-Q1 2015Dec roll 1
90 92 0.837393 2015-Q1 2015Dec roll 1
91 93 1.800000 2015-Q1 2015Dec roll 1
92 94 0.700000 2015-Q1 2015Dec roll 1
93 95 1.148500 2015-Q1 2015Dec roll 1
94 96 0.750000 2015-Q1 2015Dec roll 1
95 97 0.698144 2015-Q1 2015Dec roll 1
96 98 0.700000 2015-Q1 2015Dec roll 1
97 101 0.500000 2015-Q1 2015Dec roll 1
98 102 0.600000 2015-Q1 2015Dec roll 1
99 103 1.300000 2015-Q1 2015Dec roll 1

100 rows × 5 columns


In [25]:
df_big['targetNew'].value_counts()


Out[25]:
t         330
t+1       318
roll 1    277
t+2       274
t+4       262
roll 2    250
dtype: int64

In [26]:
gb = df_big.groupby(['source','targetNew'])

gb_1 = gb.get_group(('2015-Q1','2015')) gb_1


In [67]:
df_stat = gb['point'].agg([np.mean, np.var])

In [68]:
df_stat


Out[68]:
mean var
source targetNew
2015-Q1 roll 1 0.755420 0.179792
roll 2 1.221327 0.120416
t 0.252707 0.103503
t+1 1.057263 0.105719
t+2 1.489798 0.079780
t+4 1.770240 0.044857
2015-Q2 roll 1 0.981652 0.156936
roll 2 1.438915 0.062537
t 0.094257 0.035210
t+1 1.179800 0.061636
t+2 1.562587 0.056315
t+4 1.836707 0.032103
2015-Q3 roll 1 1.207638 0.089087
roll 2 1.535782 0.047960
t 0.242508 0.015741
t+1 1.257033 0.067731
t+2 1.595614 0.067669
t+4 1.856788 0.043124
2015-Q4 roll 1 1.103721 0.080145
roll 2 1.528929 0.054251
t 0.112460 0.007842
t+1 1.049451 0.050741
t+2 1.534099 0.071899
t+4 1.862534 0.038352
2016-Q1 roll 1 1.154667 0.154125
roll 2 1.509689 0.084609
t 0.743109 0.081273
t+1 1.420783 0.073569
t+2 1.632693 0.068987
t+4 1.801522 0.049907
2016-Q2 roll 1 1.095586 0.166638
roll 2 1.509118 0.073704
t 0.253606 0.034805
t+1 1.319251 0.055538
t+2 1.588504 0.079778
t+4 1.814962 0.027486

In [69]:
df_stat.reindex()


Out[69]:
mean var
source targetNew
2015-Q1 roll 1 0.755420 0.179792
roll 2 1.221327 0.120416
t 0.252707 0.103503
t+1 1.057263 0.105719
t+2 1.489798 0.079780
t+4 1.770240 0.044857
2015-Q2 roll 1 0.981652 0.156936
roll 2 1.438915 0.062537
t 0.094257 0.035210
t+1 1.179800 0.061636
t+2 1.562587 0.056315
t+4 1.836707 0.032103
2015-Q3 roll 1 1.207638 0.089087
roll 2 1.535782 0.047960
t 0.242508 0.015741
t+1 1.257033 0.067731
t+2 1.595614 0.067669
t+4 1.856788 0.043124
2015-Q4 roll 1 1.103721 0.080145
roll 2 1.528929 0.054251
t 0.112460 0.007842
t+1 1.049451 0.050741
t+2 1.534099 0.071899
t+4 1.862534 0.038352
2016-Q1 roll 1 1.154667 0.154125
roll 2 1.509689 0.084609
t 0.743109 0.081273
t+1 1.420783 0.073569
t+2 1.632693 0.068987
t+4 1.801522 0.049907
2016-Q2 roll 1 1.095586 0.166638
roll 2 1.509118 0.073704
t 0.253606 0.034805
t+1 1.319251 0.055538
t+2 1.588504 0.079778
t+4 1.814962 0.027486

In [70]:
colors = {'roll 1':'wheat',
    'roll 2':'#1f77b4', 
    't':'#ff7f0e', 
    't+1':'#2ca02c',
    't+2':'#d62728',
    't+4':'#9467bd'}

In [71]:
df_stat.reset_index(inplace=True)
df_stat


Out[71]:
source targetNew mean var
0 2015-Q1 roll 1 0.755420 0.179792
1 2015-Q1 roll 2 1.221327 0.120416
2 2015-Q1 t 0.252707 0.103503
3 2015-Q1 t+1 1.057263 0.105719
4 2015-Q1 t+2 1.489798 0.079780
5 2015-Q1 t+4 1.770240 0.044857
6 2015-Q2 roll 1 0.981652 0.156936
7 2015-Q2 roll 2 1.438915 0.062537
8 2015-Q2 t 0.094257 0.035210
9 2015-Q2 t+1 1.179800 0.061636
10 2015-Q2 t+2 1.562587 0.056315
11 2015-Q2 t+4 1.836707 0.032103
12 2015-Q3 roll 1 1.207638 0.089087
13 2015-Q3 roll 2 1.535782 0.047960
14 2015-Q3 t 0.242508 0.015741
15 2015-Q3 t+1 1.257033 0.067731
16 2015-Q3 t+2 1.595614 0.067669
17 2015-Q3 t+4 1.856788 0.043124
18 2015-Q4 roll 1 1.103721 0.080145
19 2015-Q4 roll 2 1.528929 0.054251
20 2015-Q4 t 0.112460 0.007842
21 2015-Q4 t+1 1.049451 0.050741
22 2015-Q4 t+2 1.534099 0.071899
23 2015-Q4 t+4 1.862534 0.038352
24 2016-Q1 roll 1 1.154667 0.154125
25 2016-Q1 roll 2 1.509689 0.084609
26 2016-Q1 t 0.743109 0.081273
27 2016-Q1 t+1 1.420783 0.073569
28 2016-Q1 t+2 1.632693 0.068987
29 2016-Q1 t+4 1.801522 0.049907
30 2016-Q2 roll 1 1.095586 0.166638
31 2016-Q2 roll 2 1.509118 0.073704
32 2016-Q2 t 0.253606 0.034805
33 2016-Q2 t+1 1.319251 0.055538
34 2016-Q2 t+2 1.588504 0.079778
35 2016-Q2 t+4 1.814962 0.027486

In [96]:
# plotly stuff, following gapminder bubble chart example

# (!) Set 'size' values to be proportional to rendered area,
#     instead of diameter. This makes the range of bubble sizes smaller
sizemode = 'area'       

# (!) Set a reference for 'size' values (i.e. a population-to-pixel scaling).
#     Here the max bubble area will be on the order of 100 pixels
sizeref = df_stat['var'].max() / 1.3e2**2
#sizeref = .2
# Define a trace-generating function (returns a Scatter object)
def make_trace(X, target, sizes, color):  
    return Scatter(
        x=X['source'],  # GDP on the x-xaxis
        y=X['mean'],    # life Exp on th y-axis
        name=target,    # label continent names on hover
        mode='markers',    # (!) point markers only on this plot
        marker= Marker(
            color=color,          # marker color
            size=sizes,           # (!) marker sizes (sizes is a list)
            sizeref=sizeref,      # link sizeref
            sizemode=sizemode,    # link sizemode
            opacity=0.6,          # (!) partly transparent markers
            line=Line(width=0.0)  # remove marker borders
        )
    )

In [97]:
# plotly stuff, following gapminder bubble chart example
# Initialize data object 
data = Data()

# Group data frame by continent sub-dataframe (named X), 
#   make one trace object per continent and append to data object
for target, X in df_stat.groupby('targetNew'):
    
    sizes = X['var']/10                            # get population array 
    color = colors[target]                      # get bubble color
    
    data.append(
        make_trace(X, target, sizes, color)  # append trace to data object
    )

In [98]:
# Set plot and axis titles
title = "Figure: Bubble Chart for HICP inflation"
x_title = "SPF source"
y_title = "Mean of point forecasts"

# Define a dictionary of axis style options
axis_style = dict(     
    zeroline=False,       # remove thick zero line
    gridcolor='#FFFFFF',  # white grid lines
    ticks='outside',      # draw ticks outside axes 
    ticklen=8,            # tick length
    tickwidth=1.5         #   and width
)

# Make layout object
layout = Layout(
    title=title,             # set plot title
    plot_bgcolor='#EFECEA',  # set plot color to grey
    xaxis=XAxis(
        axis_style,      # add axis style dictionary
        title=x_title,   # x-axis title
    ),
    yaxis=YAxis(
        axis_style,      # add axis style dictionary
        title=y_title,   # y-axis title
    )
)

In [99]:
# Make Figure object
fig = Figure(data=data, layout=layout)

# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s3_life-SPF')


Out[99]:

In [103]:
%matplotlib inline

import matplotlib as plt

In [ ]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

In [105]:
ax.scatter(df_stat['source'],df_stat['point'], s=df_stat['var']) # Added third variable income as size of the bubble


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-105-7a8c2f139e5e> in <module>()
----> 1 ax.scatter(df_stat['source'],df_stat['point'], s=df_stat['var']) # Added third variable income as size of the bubble

NameError: name 'ax' is not defined

In [ ]:


In [ ]:


In [ ]: