In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import numpy as np

In [2]:
ls


Untitled.ipynb                     get-raw-data_HICP-items.ipynb
get-raw-data-HICP-countries.ipynb  raw_data_countries.csv

In [3]:
df_ind_ctry = pd.read_csv('raw_data_countries.csv',header=0,index_col=0,parse_dates=0)
df_ind_ctry.head()


Out[3]:
76451 46422 46425 46427 46424 46429 46420 46430 46423 46431 46432 46426 46428 1663537 1875356 1875369 1907172 2025053 2041200 2225322
1990-01-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1990-02-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1990-03-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1990-04-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1990-05-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

In [4]:
df_ind_ctry.index


Out[4]:
DatetimeIndex(['1990-01-01', '1990-02-01', '1990-03-01', '1990-04-01',
               '1990-05-01', '1990-06-01', '1990-07-01', '1990-08-01',
               '1990-09-01', '1990-10-01', 
               ...
               '2015-06-01', '2015-07-01', '2015-08-01', '2015-09-01',
               '2015-10-01', '2015-11-01', '2015-12-01', '2016-01-01',
               '2016-02-01', '2016-03-01'],
              dtype='datetime64[ns]', length=315, freq=None, tz=None)

In [5]:
mask_rows_ind = df_ind_ctry.index.year >= 1999
mask_rows_ind;

In [6]:
df_ind_ctry = df_ind_ctry[mask_rows_ind]
df_ind_ctry.head()


Out[6]:
76451 46422 46425 46427 46424 46429 46420 46430 46423 46431 46432 46426 46428 1663537 1875356 1875369 1907172 2025053 2041200 2225322
1999-01-01 73.87 77.6 76.16 71.6 67.98 71.66 72.44 73.27 67.81 70.44 73.50 71.6 66.16 55.21 70.54 68.26 52.03 55.40 53.41 68.24
1999-02-01 74.06 77.8 76.43 71.8 68.04 72.14 72.59 73.42 67.32 70.44 73.79 72.1 67.41 55.43 69.30 68.31 52.43 55.51 53.53 68.22
1999-03-01 74.27 77.9 76.69 71.9 68.30 72.90 72.66 73.49 68.97 70.71 73.93 72.3 67.48 55.62 70.61 68.34 52.57 55.77 53.51 68.06
1999-04-01 74.51 78.2 76.92 72.0 68.56 73.11 72.94 73.49 69.83 71.18 74.57 72.7 67.81 55.80 70.83 70.34 52.82 55.91 53.44 68.10
1999-05-01 74.57 78.2 76.91 72.2 68.56 73.11 73.08 73.56 69.65 71.25 74.79 73.2 68.00 56.12 71.06 70.86 52.89 56.10 53.68 68.06

In [7]:
## Compute annual inflation rates

In [8]:
df_infl_ctry = df_ind_ctry.pct_change(periods=12)*100
mask_rows_infl = df_infl_ctry.index.year >= 2000
df_infl_ctry = df_infl_ctry[mask_rows_infl]
df_infl_ctry.tail()


Out[8]:
76451 46422 46425 46427 46424 46429 46420 46430 46423 46431 46432 46426 46428 1663537 1875356 1875369 1907172 2025053 2041200 2225322
2015-11-01 0.149940 0.200200 0.110276 0.099502 -0.406423 0.372721 1.399658 0.480769 -0.109868 0.622678 -0.229129 -0.100200 0.431034 -0.931062 -1.469274 1.323756 -0.379015 0.544520 0.020090 -0.525377
2015-12-01 0.230138 0.200200 0.270487 0.099502 -0.129715 0.494750 1.450151 1.071286 0.391606 0.291252 -0.249426 0.201207 0.850030 -0.608357 -0.564345 1.252824 -0.460046 -0.171406 0.404490 -0.249426
2016-01-01 0.335366 0.405268 0.334347 0.408163 -0.418197 0.214680 1.841564 1.391145 -0.121926 0.703436 0.030154 0.000000 0.450404 -0.767057 -1.078222 0.810053 -0.280364 0.070865 -0.313321 0.666936
2016-02-01 -0.151469 -0.200602 -0.100583 -0.203666 -0.987981 0.324478 1.114906 1.022785 0.071276 0.173275 -0.140238 -0.201410 -0.310995 -0.886739 -2.205808 0.971576 -0.310995 0.352467 -0.584089 0.536329
2016-03-01 -0.049925 0.099800 -0.099830 -0.199402 -0.988517 0.520625 1.642957 0.579131 -0.669531 0.469953 -0.019960 -0.600000 -0.619319 -0.949051 -2.150538 1.024170 -0.490638 0.460230 -0.619443 0.782897

df_infl_ctry.rename(columns = dic)

tt = df_infl_ctry.copy() tt['month'] = tt.index.month tt['year'] = tt.index.year melted_df = pd.melt(tt,id_vars=['month','year']) melted_df.head()


In [9]:
df_infl_ctry['min'] = df_infl_ctry.apply(min,axis=1)
df_infl_ctry['max'] = df_infl_ctry.apply(max,axis=1)
df_infl_ctry['mean'] = df_infl_ctry.apply(np.mean,axis=1)
df_infl_ctry['mode'] = df_infl_ctry.quantile(q=0.5, axis=1)
df_infl_ctry['10th'] = df_infl_ctry.quantile(q=0.10, axis=1)
df_infl_ctry['90th'] = df_infl_ctry.quantile(q=0.90, axis=1)
df_infl_ctry['25th'] = df_infl_ctry.quantile(q=0.25, axis=1)
df_infl_ctry['75th'] = df_infl_ctry.quantile(q=0.75, axis=1)

In [10]:
df_infl_ctry.head()


Out[10]:
76451 46422 46425 46427 46424 46429 46420 46430 46423 46431 ... 2041200 2225322 min max mean mode 10th 90th 25th 75th
2000-01-01 1.868147 1.675258 1.667542 2.234637 2.868491 1.632710 0.303700 1.378463 2.344787 1.902328 ... 3.126755 0.893904 0.303700 13.665193 3.566045 2.344787 1.039272 6.471644 1.669471 3.521233
2000-02-01 1.930867 1.542416 1.439225 2.367688 2.968842 1.524813 2.135280 1.947698 2.644088 1.618399 ... 3.399963 0.938141 0.938141 16.669846 3.995416 2.644088 1.464902 6.886909 1.696516 3.697690
2000-03-01 1.952336 1.412067 1.603860 2.503477 3.045388 1.604938 2.518580 1.945843 2.769320 1.414227 ... 3.214352 0.969733 0.969733 16.796652 4.151179 3.008299 1.412715 7.455774 1.690164 3.751036
2000-04-01 1.717890 1.023018 1.300052 2.361111 3.033839 1.682396 2.316973 1.755341 2.105112 1.882551 ... 3.723802 0.895742 0.895742 16.168118 4.061522 2.507711 1.106128 7.913532 1.727253 3.892662
2000-05-01 1.756739 0.895141 1.521259 2.493075 3.223454 1.969635 2.408320 1.563350 2.641780 2.442105 ... 3.017884 0.543638 0.543638 16.335791 4.082136 2.687525 1.082976 7.834255 1.809963 3.854503

5 rows × 28 columns

df_infl_ctry['month'] = df_infl_ctry.index.month df_infl_ctry['year'] = df_infl_ctry.index.year


In [11]:
df_infl_ctry.tail()


Out[11]:
76451 46422 46425 46427 46424 46429 46420 46430 46423 46431 ... 2041200 2225322 min max mean mode 10th 90th 25th 75th
2015-11-01 0.149940 0.200200 0.110276 0.099502 -0.406423 0.372721 1.399658 0.480769 -0.109868 0.622678 ... 0.020090 -0.525377 -1.469274 1.399658 0.069781 0.099502 -0.809356 1.043324 -0.341543 0.455902
2015-12-01 0.230138 0.200200 0.270487 0.099502 -0.129715 0.494750 1.450151 1.071286 0.391606 0.291252 ... 0.404490 -0.249426 -0.608357 1.450151 0.255318 0.230138 -0.533056 1.180209 -0.229921 0.449620
2016-01-01 0.335366 0.405268 0.334347 0.408163 -0.418197 0.214680 1.841564 1.391145 -0.121926 0.703436 ... -0.313321 0.666936 -1.078222 1.841564 0.247574 0.247574 -0.662399 1.158708 -0.240755 0.558670
2016-02-01 -0.151469 -0.200602 -0.100583 -0.203666 -0.987981 0.324478 1.114906 1.022785 0.071276 0.173275 ... -0.584089 0.536329 -2.205808 1.114906 -0.127654 -0.140238 -0.957609 1.002301 -0.310995 0.338473
2016-03-01 -0.049925 0.099800 -0.099830 -0.199402 -0.988517 0.520625 1.642957 0.579131 -0.669531 0.469953 ... -0.619443 0.782897 -2.150538 1.642957 -0.108362 -0.099830 -0.976677 0.927661 -0.619412 0.495289

5 rows × 28 columns


In [12]:
print(df_infl_ctry.describe())


            76451       46422       46425       46427       46424       46429  \
count  195.000000  195.000000  195.000000  195.000000  195.000000  195.000000   
mean     1.824946    1.523799    1.639645    2.018713    2.326464    1.985925   
std      0.962058    0.840127    0.907143    1.069964    1.610770    1.307803   
min     -0.651324   -0.753498   -0.796298   -0.507614   -1.457433   -0.670187   
25%      1.532931    0.984822    1.159837    1.388335    1.700045    1.260650   
50%      2.045728    1.542416    1.794789    2.234637    2.769933    1.789892   
75%      2.421634    2.099493    2.232076    2.766257    3.477291    2.784275   
max      4.055123    3.452116    4.041648    4.252874    5.325645    5.472837   

            46420       46430       46423       46431     ...         2041200  \
count  195.000000  195.000000  195.000000  195.000000     ...      195.000000   
mean     1.986481    1.925091    2.325408    2.132327     ...        3.972324   
std      1.247366    0.862498    2.010143    1.546948     ...        4.358711   
min     -1.729298   -0.428700   -2.856048   -1.787243     ...       -4.347373   
25%      1.181016    1.463628    0.945002    0.708717     ...        0.833012   
50%      1.979674    1.842071    3.015015    2.460457     ...        3.134250   
75%      2.730223    2.335375    3.697780    3.350605     ...        6.504888   
max      5.892710    4.037449    5.655230    5.103891     ...       17.704707   

          2225322         min         max        mean        mode        10th  \
count  195.000000  195.000000  195.000000  195.000000  195.000000  195.000000   
mean     2.465600   -0.051095    6.699332    2.469938    2.114402    0.844293   
std      3.104493    1.475875    3.972449    1.470379    1.154035    1.235662   
min     -1.891320   -4.347373    0.561272   -0.651156   -0.523666   -2.078496   
25%      0.313888   -1.114606    3.972835    1.543499    1.542305   -0.208424   
50%      1.963065    0.183688    6.739948    2.750587    2.418380    1.223951   
75%      3.584854    1.151262    8.417527    3.222020    2.844087    1.670366   
max     12.671014    2.975582   17.704707    6.239699    4.940069    3.369469   

             90th        25th        75th  
count  195.000000  195.000000  195.000000  
mean     4.588673    1.452917    2.915524  
std      2.549223    1.076920    1.373210  
min      0.231518   -1.189413   -0.379625  
25%      2.988610    0.811909    2.164842  
50%      4.364926    1.687474    3.265423  
75%      5.864399    2.178591    3.828991  
max     12.189525    3.962180    6.066205  

[8 rows x 28 columns]

Generate a bunch of histograms of the data to make sure that all of the data

is in an expected range.

with plt.style.context('https://gist.githubusercontent.com/rhiever/d0a7332fe0beebfdc3d5/raw/223d70799b48131d5ce2723cd5784f39d7a3a653/tableau10.mplstyle'): for column in df_infl_ctry.columns[:-2]:

    #if column in ['date']:
    #    continue
    plt.figure()
    plt.hist(df_infl_ctry[column].values)
    plt.title(column)
    #plt.savefig('{}.png'.format(column))

In [13]:
len(df_infl_ctry)


Out[13]:
195

In [14]:
df_infl_ctry.columns


Out[14]:
Index(['76451', '46422', '46425', '46427', '46424', '46429', '46420', '46430',
       '46423', '46431', '46432', '46426', '46428', '1663537', '1875356',
       '1875369', '1907172', '2025053', '2041200', '2225322', 'min', 'max',
       'mean', 'mode', '10th', '90th', '25th', '75th'],
      dtype='object')

In [15]:
df_infl_ctry['month_order'] = range(len(df_infl_ctry))
month_order = df_infl_ctry['month_order']
max_infl = df_infl_ctry['max'].values
min_infl = df_infl_ctry['min'].values
mean_infl = df_infl_ctry['mean'].values
mode_infl = df_infl_ctry['mode'].values
p25th = df_infl_ctry['25th'].values
p75th = df_infl_ctry['75th'].values
p10th = df_infl_ctry['10th'].values
p90th = df_infl_ctry['90th'].values
inflEA = df_infl_ctry['76451'].values

In [16]:
year_begin_df = df_infl_ctry[df_infl_ctry.index.month == 1]
year_begin_df;

In [17]:
year_beginning_indeces = list(year_begin_df['month_order'].values)
year_beginning_indeces


Out[17]:
[0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168, 180, 192]

In [18]:
year_beginning_names = list(year_begin_df.index.year)
year_beginning_names


Out[18]:
[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016]

In [19]:
month_order


Out[19]:
2000-01-01      0
2000-02-01      1
2000-03-01      2
2000-04-01      3
2000-05-01      4
2000-06-01      5
2000-07-01      6
2000-08-01      7
2000-09-01      8
2000-10-01      9
2000-11-01     10
2000-12-01     11
2001-01-01     12
2001-02-01     13
2001-03-01     14
2001-04-01     15
2001-05-01     16
2001-06-01     17
2001-07-01     18
2001-08-01     19
2001-09-01     20
2001-10-01     21
2001-11-01     22
2001-12-01     23
2002-01-01     24
2002-02-01     25
2002-03-01     26
2002-04-01     27
2002-05-01     28
2002-06-01     29
             ... 
2013-10-01    165
2013-11-01    166
2013-12-01    167
2014-01-01    168
2014-02-01    169
2014-03-01    170
2014-04-01    171
2014-05-01    172
2014-06-01    173
2014-07-01    174
2014-08-01    175
2014-09-01    176
2014-10-01    177
2014-11-01    178
2014-12-01    179
2015-01-01    180
2015-02-01    181
2015-03-01    182
2015-04-01    183
2015-05-01    184
2015-06-01    185
2015-07-01    186
2015-08-01    187
2015-09-01    188
2015-10-01    189
2015-11-01    190
2015-12-01    191
2016-01-01    192
2016-02-01    193
2016-03-01    194
Name: month_order, dtype: int64

In [20]:
#import seaborn as sns

In [22]:
fig, ax1 = plt.subplots(figsize=(15, 7))

# Create the bars showing highs and lows
#plt.bar(month_order, max_infl - min_infl, bottom=min_infl,
#            edgecolor='none', color='#C3BBA4', width=1)

plt.bar(month_order, p90th - p10th, bottom=p10th,
            edgecolor='none', color='#C3BBA4', width=1)
# Create the bars showing average highs and lows
plt.bar(month_order, p75th - p25th, bottom=p25th,
            edgecolor='none', color='#9A9180', width=1);

#annotations={month_order[50]:'Dividends'}
plt.plot(month_order, inflEA, color='#5A3B49',linewidth=2 );


plt.plot(month_order, mode_infl, color='wheat',linewidth=2,alpha=.3);

plt.xticks(year_beginning_indeces,
               year_beginning_names,
               fontsize=10)

#ax2 = ax1.twiny()
    

plt.xticks(year_beginning_indeces,
               year_beginning_names,
               fontsize=10);

plt.xlim(-5,200)
plt.grid(False)


##ax2 = ax1.twiny()
plt.ylim(-5, 14)

#ax3 = ax1.twinx()

plt.yticks(range(-4, 15, 2), [r'{}'.format(x)
                              for x in range(-4, 15, 2)], fontsize=10);



plt.grid(axis='both', color='wheat', linewidth=1.5, alpha = .5)

plt.title('HICP innflation, annual rate of change, Jan 2000 - March 2016\n\n', fontsize=20);



In [ ]: