In [45]:
import pandas as pd
import numpy as np
import os
# (*) Import plotly package
import plotly
# Check plolty version (if not latest, please upgrade)
#plotly.__version__
# (*) To communicate with Plotly's server, sign in with credentials file
import plotly.plotly as py
# (*) Useful Python/Plotly tools
import plotly.tools as tls
# (*) Graph objects to piece together plots
from plotly.graph_objs import *
In [106]:
df_big = pd.DataFrame(columns = ['source','target','id','point'])
years = [2015,2016]
quarters = [1,2,3,4]
months = ['Dec','Mar','Jun','Sep']
df_List = []
for year in years:
for q in quarters:
f = str(year) + 'Q' + str(q)
fname = f + '.csv'
if os.path.isfile('../data/' + fname):
raw_df = pd.read_csv('../data/' + fname,header = 1)
# find the row where the growth expectations start
dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
mask_columns = ~raw_df.columns.str.contains('Unnamed')
df = raw_df.iloc[0:dum-1,mask_columns]
df['source'] = str(year) + '-Q' + str(q)
df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point',
'TN1_0':'[-2.0,-1.1]','FN1_0TN0_6':'[-1.0,-0.6]',
'FN0_5TN0_1':'[-0.5,-0.1]','F0_0T0_4':'[0.0,0.4]',
'F0_5T0_9':'[0.5,0.9]','F1_0T1_4':'[1.0,1.4]',
'F1_5T1_9':'[1.5,1.9]','F2_0T2_4':'[2.0,2.4]',
'F2_5T2_9':'[2.5,2.9]','F3_0T3_4':'[3.0,3.4]',
'F3_5T3_9':'[3.5,3.9]','F4_0':'[4.0,5.0]'})
df = df[['source','target','id','point']]
# remove rows where point is missing
maskNaN = df.point.isnull()
df = df[~maskNaN]
df.fillna(0,inplace = True)
for colname in df.columns[3:]:
df[colname] = df[colname].astype('float')
# create a new target column
mask_t0 = str(year)
mask_t1 = str(year+1)
mask_t2 = str(year+2)
if q<3:
mask_t4or5 = str(year+4)
else:
mask_t4or5 = str(year+5)
if q==1:
mask_Rt1 = str(year) + months[q-1]
mask_Rt2 = str(year+1) + months[q-1]
else:
mask_Rt1 = str(year+1) + months[q-1]
mask_Rt2 = str(year+2) + months[q-1]
#
df.loc[df.loc[:,'target'] == mask_t0,'targetNew'] = 't'
df.loc[df.loc[:,'target'] == mask_t1,'targetNew'] = 't+1'
df.loc[df.loc[:,'target'] == mask_t2,'targetNew'] = 't+2'
df.loc[df.loc[:,'target'] == mask_t4or5,'targetNew'] = 't+4'
df.loc[df.loc[:,'target'] == mask_Rt1,'targetNew'] = 'roll 1'
df.loc[df.loc[:,'target'] == mask_Rt2,'targetNew'] = 'roll 2'
df_big = pd.concat([df_big,df], axis=0, ignore_index=True)
#df_List.append(df)
In [108]:
df_big.head(100)
Out[108]:
In [25]:
df_big['targetNew'].value_counts()
Out[25]:
In [26]:
gb = df_big.groupby(['source','targetNew'])
gb_1 = gb.get_group(('2015-Q1','2015')) gb_1
In [67]:
df_stat = gb['point'].agg([np.mean, np.var])
In [68]:
df_stat
Out[68]:
In [69]:
df_stat.reindex()
Out[69]:
In [70]:
colors = {'roll 1':'wheat',
'roll 2':'#1f77b4',
't':'#ff7f0e',
't+1':'#2ca02c',
't+2':'#d62728',
't+4':'#9467bd'}
In [71]:
df_stat.reset_index(inplace=True)
df_stat
Out[71]:
In [96]:
# plotly stuff, following gapminder bubble chart example
# (!) Set 'size' values to be proportional to rendered area,
# instead of diameter. This makes the range of bubble sizes smaller
sizemode = 'area'
# (!) Set a reference for 'size' values (i.e. a population-to-pixel scaling).
# Here the max bubble area will be on the order of 100 pixels
sizeref = df_stat['var'].max() / 1.3e2**2
#sizeref = .2
# Define a trace-generating function (returns a Scatter object)
def make_trace(X, target, sizes, color):
return Scatter(
x=X['source'], # GDP on the x-xaxis
y=X['mean'], # life Exp on th y-axis
name=target, # label continent names on hover
mode='markers', # (!) point markers only on this plot
marker= Marker(
color=color, # marker color
size=sizes, # (!) marker sizes (sizes is a list)
sizeref=sizeref, # link sizeref
sizemode=sizemode, # link sizemode
opacity=0.6, # (!) partly transparent markers
line=Line(width=0.0) # remove marker borders
)
)
In [97]:
# plotly stuff, following gapminder bubble chart example
# Initialize data object
data = Data()
# Group data frame by continent sub-dataframe (named X),
# make one trace object per continent and append to data object
for target, X in df_stat.groupby('targetNew'):
sizes = X['var']/10 # get population array
color = colors[target] # get bubble color
data.append(
make_trace(X, target, sizes, color) # append trace to data object
)
In [98]:
# Set plot and axis titles
title = "Figure: Bubble Chart for HICP inflation"
x_title = "SPF source"
y_title = "Mean of point forecasts"
# Define a dictionary of axis style options
axis_style = dict(
zeroline=False, # remove thick zero line
gridcolor='#FFFFFF', # white grid lines
ticks='outside', # draw ticks outside axes
ticklen=8, # tick length
tickwidth=1.5 # and width
)
# Make layout object
layout = Layout(
title=title, # set plot title
plot_bgcolor='#EFECEA', # set plot color to grey
xaxis=XAxis(
axis_style, # add axis style dictionary
title=x_title, # x-axis title
),
yaxis=YAxis(
axis_style, # add axis style dictionary
title=y_title, # y-axis title
)
)
In [99]:
# Make Figure object
fig = Figure(data=data, layout=layout)
# (@) Send to Plotly and show in notebook
py.iplot(fig, filename='s3_life-SPF')
Out[99]:
In [103]:
%matplotlib inline
import matplotlib as plt
In [ ]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
In [105]:
ax.scatter(df_stat['source'],df_stat['point'], s=df_stat['var']) # Added third variable income as size of the bubble
In [ ]:
In [ ]:
In [ ]: