Make a plot of HICP inflation by item groups


In [47]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import numpy as np

from matplotlib.ticker import FixedLocator, FixedFormatter
#import seaborn as sns

from matplotlib.ticker import FixedLocator, FixedFormatter
import seaborn as sns

In [48]:
ls


 Volume in drive C is OSDisk
 Volume Serial Number is F2B8-9F6A

 Directory of c:\Users\EEU227\Documents\Projects\Inflation\Disaggregated-Data

06/06/2016  05:03 PM    <DIR>          .
06/06/2016  05:03 PM    <DIR>          ..
05/27/2016  05:45 PM    <DIR>          .ipynb_checkpoints
05/27/2016  11:28 AM            25,434 .Rhistory
06/06/2016  05:03 PM            47,563 get-raw-data_HICP-items.ipynb
05/27/2016  11:28 AM           118,648 get-raw-data-HICP-countries.ipynb
06/06/2016  05:02 PM           349,399 infl_items.csv
06/06/2016  05:02 PM           269,366 Inflation-Items.png
06/06/2016  05:02 PM           136,856 Inflation-Items.svg
05/13/2016  08:30 PM            33,807 raw_data_countries.csv
06/06/2016  05:02 PM           120,847 raw_data_items.csv
05/27/2016  11:28 AM           115,640 weather-like-plot-HICP-by-country.ipynb
05/27/2016  05:36 PM           119,980 weather-like-plot-HICP-by-item.ipynb
05/27/2016  08:07 PM             9,965 weather-like-plot-HICP-by-item.R
06/06/2016  05:03 PM           161,772 weather-like-plot-HICP-by-item-ver-2.ipynb
              12 File(s)      1,509,277 bytes
               3 Dir(s)  312,532,709,376 bytes free

In [49]:
df_ind_items = pd.read_csv('raw_data_items.csv',header=0,index_col=0,parse_dates=0)
df_ind_items.head()


Out[49]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184325 240469 240487 240505 100281 241333 241405 241441 184343 184361
date
1999-01-01 73.87 71.97 68.21 66.64 76.49 71.21 75.94 70.13 77.03 82.79 ... 71.18 NaN NaN NaN 63.99 NaN NaN NaN 73.68 70.42
1999-02-01 74.06 71.89 67.20 67.54 76.58 71.28 75.89 70.93 77.18 82.82 ... 71.69 NaN NaN NaN 64.42 NaN NaN NaN 73.75 70.53
1999-03-01 74.27 71.75 67.35 68.22 76.89 71.36 75.91 71.61 77.27 83.07 ... 72.10 NaN NaN NaN 64.43 NaN NaN NaN 73.74 70.55
1999-04-01 74.51 71.65 67.56 68.76 78.04 71.41 75.86 72.05 77.34 83.16 ... 72.25 NaN NaN NaN 64.43 NaN NaN NaN 73.88 70.61
1999-05-01 74.57 71.54 67.50 70.41 77.25 71.43 75.77 72.35 77.39 83.35 ... 72.85 NaN NaN NaN 64.40 NaN NaN NaN 74.25 70.71

5 rows × 94 columns


In [50]:
df_ind_items.index


Out[50]:
DatetimeIndex(['1999-01-01', '1999-02-01', '1999-03-01', '1999-04-01',
               '1999-05-01', '1999-06-01', '1999-07-01', '1999-08-01',
               '1999-09-01', '1999-10-01',
               ...
               '2015-08-01', '2015-09-01', '2015-10-01', '2015-11-01',
               '2015-12-01', '2016-01-01', '2016-02-01', '2016-03-01',
               '2016-04-01', '2016-05-01'],
              dtype='datetime64[ns]', name='date', length=209, freq=None)

Compute annual inflation rates


In [51]:
df_infl_items = df_ind_items.pct_change(periods=12)*100
mask_rows_infl = df_infl_items.index.year >= 2000
df_infl_items = df_infl_items[mask_rows_infl]
df_infl_items.tail()


Out[51]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184325 240469 240487 240505 100281 241333 241405 241441 184343 184361
date
2016-01-01 0.325203 0.019992 3.778664 5.160331 0.372159 0.360794 -1.613222 6.078088 0.854100 0.070119 ... 0.298507 0.751804 -0.199302 2.033830 1.647413 2.222222 2.204188 3.875418 0.704225 -0.358780
2016-02-01 -0.151469 0.010010 2.987026 4.825850 -2.453690 0.421095 -1.457466 5.469630 0.370556 0.090036 ... -0.288902 0.810730 0.079745 2.178277 1.656959 2.054174 2.453926 3.403009 0.905159 -0.388562
2016-03-01 -0.049925 -0.010015 2.577268 4.757463 0.669687 0.360577 -1.321017 3.610548 0.349511 -0.139986 ... -0.169678 0.740741 0.510051 2.074772 1.656460 1.959799 2.533293 3.403009 0.732785 -0.467569
2016-04-01 -0.249029 -0.040044 3.119654 2.942679 1.235048 0.350210 -1.471319 2.790651 0.729198 0.219956 ... 1.080973 0.690000 0.709645 2.003205 1.716867 1.874875 2.514468 3.319879 -0.330860 -0.328260
2016-05-01 -0.119272 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 94 columns


In [52]:
tt = df_infl_items.copy() 
tt['month'] = tt.index.month 
tt['year'] = tt.index.year
tt.head()


Out[52]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 240487 240505 100281 241333 241405 241441 184343 184361 month year
date
2000-01-01 1.868147 0.555787 2.844158 -0.450180 -5.687018 0.983008 -0.039505 2.837587 0.921719 1.171639 ... NaN NaN 2.812940 NaN NaN NaN 3.610206 1.377450 1 2000
2000-02-01 1.930867 0.792878 2.485119 -0.636660 -3.538783 0.982043 0.052708 1.649514 0.712620 1.195363 ... NaN NaN 2.390562 NaN NaN NaN 3.877966 1.403658 2 2000
2000-03-01 1.952336 1.128920 2.108389 -2.477279 -5.124203 0.994955 0.079041 0.516688 0.608257 0.854701 ... NaN NaN 2.467795 NaN NaN NaN 3.892053 1.459957 3 2000
2000-04-01 1.717890 1.618981 2.708703 -1.759744 -3.703229 0.966251 0.250461 -0.416378 0.568916 0.925926 ... NaN NaN 2.483315 NaN NaN NaN 3.708717 1.458717 4 2000
2000-05-01 1.756739 2.026838 2.696296 -1.988354 -2.757282 1.161977 0.369539 -1.285418 0.607314 0.611878 ... NaN NaN 2.515528 NaN NaN NaN 3.326599 1.499081 5 2000

5 rows × 96 columns


In [53]:
tt.to_csv('infl_items.csv')

df_infl_items.rename(columns = dic)

tt = df_infl_items.copy() tt['month'] = tt.index.month tt['year'] = tt.index.year melted_df = pd.melt(tt,id_vars=['month','year']) melted_df.head()


In [54]:
df_infl_items['min'] = df_infl_items.apply(min,axis=1)
df_infl_items['max'] = df_infl_items.apply(max,axis=1)
df_infl_items['mean'] = df_infl_items.apply(np.mean,axis=1)
df_infl_items['mode'] = df_infl_items.quantile(q=0.5, axis=1)
df_infl_items['10th'] = df_infl_items.quantile(q=0.10, axis=1)
df_infl_items['90th'] = df_infl_items.quantile(q=0.90, axis=1)
df_infl_items['25th'] = df_infl_items.quantile(q=0.25, axis=1)
df_infl_items['75th'] = df_infl_items.quantile(q=0.75, axis=1)

In [55]:
df_infl_items.tail()


Out[55]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184343 184361 min max mean mode 10th 90th 25th 75th
date
2016-01-01 0.325203 0.019992 3.778664 5.160331 0.372159 0.360794 -1.613222 6.078088 0.854100 0.070119 ... 0.704225 -0.358780 -22.383367 6.078088 0.526244 0.913739 -0.650838 2.680877 0.193033 1.581545
2016-02-01 -0.151469 0.010010 2.987026 4.825850 -2.453690 0.421095 -1.457466 5.469630 0.370556 0.090036 ... 0.905159 -0.388562 -28.846701 5.469630 0.115757 0.901984 -1.352614 2.466629 -0.180107 1.448255
2016-03-01 -0.049925 -0.010015 2.577268 4.757463 0.669687 0.360577 -1.321017 3.610548 0.349511 -0.139986 ... 0.732785 -0.467569 -26.546776 5.678963 0.257141 0.801844 -0.963257 2.597829 -0.019993 1.536607
2016-04-01 -0.249029 -0.040044 3.119654 2.942679 1.235048 0.350210 -1.471319 2.790651 0.729198 0.219956 ... -0.330860 -0.328260 -25.965968 21.644329 0.408413 0.781094 -1.384437 2.699406 0.005046 1.540548
2016-05-01 -0.119272 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN -0.119272 -0.119272 -0.119272 -0.119272 -0.119272 -0.119272 -0.119272 -0.119272

5 rows × 102 columns

df_infl_items['month'] = df_infl_items.index.month df_infl_items['year'] = df_infl_items.index.year


In [56]:
df_infl_items.head()


Out[56]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184343 184361 min max mean mode 10th 90th 25th 75th
date
2000-01-01 1.868147 0.555787 2.844158 -0.450180 -5.687018 0.983008 -0.039505 2.837587 0.921719 1.171639 ... 3.610206 1.377450 -13.013418 47.799576 2.038340 1.213298 -1.082721 3.586112 0.369168 2.157921
2000-02-01 1.930867 0.792878 2.485119 -0.636660 -3.538783 0.982043 0.052708 1.649514 0.712620 1.195363 ... 3.877966 1.403658 -15.192846 54.939759 2.223727 1.205807 -1.079527 3.670695 0.356208 2.159962
2000-03-01 1.952336 1.128920 2.108389 -2.477279 -5.124203 0.994955 0.079041 0.516688 0.608257 0.854701 ... 3.892053 1.459957 -17.374306 49.937765 2.054702 1.128920 -1.207884 3.756614 0.410474 2.054702
2000-04-01 1.717890 1.618981 2.708703 -1.759744 -3.703229 0.966251 0.250461 -0.416378 0.568916 0.925926 ... 3.708717 1.458717 -18.253343 33.193669 1.722454 1.259756 -1.094472 3.960396 0.382868 2.028562
2000-05-01 1.756739 2.026838 2.696296 -1.988354 -2.757282 1.161977 0.369539 -1.285418 0.607314 0.611878 ... 3.326599 1.499081 -18.313494 38.948598 1.816464 1.264173 -1.319059 3.734629 0.365247 2.050354

5 rows × 102 columns


In [57]:
print(df_infl_items.describe())


            76451      182759      182777      182831      182849      182741  \
count  197.000000  196.000000  196.000000  196.000000  196.000000  196.000000   
mean     1.804497    2.057310    2.485505    2.617403    2.081105    2.107576   
std      0.978303    1.984083    1.545281    3.952119    4.594963    2.113824   
min     -0.651324   -0.670488   -1.830638   -9.261530   -9.009637   -0.725875   
25%      1.423053    0.595766    1.434416   -0.344256   -0.770458    0.666500   
50%      2.040571    1.735726    2.379725    2.545525    2.189455    1.853340   
75%      2.419355    3.126366    3.744155    5.561125    5.145358    2.646639   
max      4.055123    9.083436    6.150938   11.422867   18.783945   10.272587   

           182795      182813      182867      182885     ...          184343  \
count  196.000000  196.000000  196.000000  196.000000     ...      196.000000   
mean     1.739714    2.201525    1.603659    1.134111     ...        1.844340   
std      3.441002    3.982286    1.478999    1.380952     ...        2.563309   
min     -4.390395   -7.174840   -0.711494   -1.146812     ...       -6.991995   
25%      0.038404   -0.728542    0.563178    0.085057     ...        0.601804   
50%      1.088888    2.365175    1.150364    0.919632     ...        1.601530   
75%      2.852254    4.815616    2.578132    2.139633     ...        3.223291   
max     14.431778   12.586099    5.007431    4.726287     ...        7.077041   

           184361         min         max        mean        mode        10th  \
count  196.000000  197.000000  197.000000  197.000000  197.000000  197.000000   
mean     2.133906  -15.585523   17.563785    1.628431    1.734899   -0.955545   
std      0.905060    6.466932   11.837626    0.791126    0.524714    0.922331   
min     -0.467569  -41.515323   -0.119272   -0.332101   -0.119272   -4.462967   
25%      1.685991  -19.627392    8.150716    1.323550    1.426360   -1.393788   
50%      2.194360  -14.296074   14.228015    1.796366    1.815469   -0.813965   
75%      2.641996  -10.880894   24.018394    2.137332    2.098650   -0.253151   
max      4.172082   -0.119272   59.267503    3.458749    2.676718    0.537287   

             90th        25th        75th  
count  197.000000  197.000000  197.000000  
mean     4.176471    0.730100    2.719295  
std      1.043980    0.571270    0.620743  
min     -0.119272   -0.518146   -0.119272  
25%      3.558153    0.290036    2.248733  
50%      4.214654    0.799258    2.909755  
75%      4.746811    1.181960    3.124619  
max      7.980168    1.670282    3.837905  

[8 rows x 102 columns]

Generate a bunch of histograms of the data to make sure that all of the data

is in an expected range.

with plt.style.context('https://gist.githubusercontent.com/rhiever/d0a7332fe0beebfdc3d5/raw/223d70799b48131d5ce2723cd5784f39d7a3a653/tableau10.mplstyle'): for column in df_infl_items.columns[:-2]:

    #if column in ['date']:
    #    continue
    plt.figure()
    plt.hist(df_infl_items[column].values)
    plt.title(column)
    #plt.savefig('{}.png'.format(column))

In [58]:
len(df_infl_items)


Out[58]:
197

In [59]:
df_infl_items.columns


Out[59]:
Index(['76451', '182759', '182777', '182831', '182849', '182741', '182795',
       '182813', '182867', '182885',
       ...
       '184343', '184361', 'min', 'max', 'mean', 'mode', '10th', '90th',
       '25th', '75th'],
      dtype='object', length=102)

In [60]:
df_infl_items['month_order'] = range(len(df_infl_items))
month_order = df_infl_items['month_order']
max_infl = df_infl_items['max'].values
min_infl = df_infl_items['min'].values
mean_infl = df_infl_items['mean'].values
mode_infl = df_infl_items['mode'].values
p25th = df_infl_items['25th'].values
p75th = df_infl_items['75th'].values
p10th = df_infl_items['10th'].values
p90th = df_infl_items['90th'].values
inflEA = df_infl_items['76451'].values

In [61]:
year_begin_df = df_infl_items[df_infl_items.index.month == 1]
year_begin_df;

In [62]:
year_beginning_indeces = list(year_begin_df['month_order'].values)
year_beginning_indeces


Out[62]:
[0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168, 180, 192]

In [63]:
year_beginning_names = list(year_begin_df.index.year)
year_beginning_names


Out[63]:
[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016]

In [64]:
inflEA[inflEA.argmin()]
inflEA[inflEA.argmax()]


Out[64]:
4.0551225573252081

In [65]:
hist_low,ind_hist_low = min(inflEA), inflEA.argmin()
hist_high,ind_hist_high = max(inflEA), inflEA.argmax()

In [66]:
print(hist_high)
print(ind_hist_high)


4.05512255733
102

In [67]:
print(min(inflEA))
print(max(inflEA))


-0.651324359531
4.05512255733

In [68]:
blue3  = tuple(x/255 for x in [24, 116, 205])  # 1874CD
wheat2 = tuple(x/255 for x in [238, 216, 174])
wheat3 = tuple(x/255 for x in [205, 186, 150])
wheat4 = tuple(x/255 for x in [139, 126, 102])

firebrick3 = tuple(x/255 for x in [205, 38, 38])
gray30 = tuple(x/255 for x in [77, 77, 77])

In [69]:
idx = month_order
fig, ax = plt.subplots(figsize=(20, 10), subplot_kw={'axisbg': 'white'},
                       facecolor='white')
# plot the high-low bars
plt.vlines(idx, p10th, p90th, color=wheat3, alpha=.9,
          linewidth=2.0);

#ax.vlines(idx, past_stats.lower, past_stats.upper, color=wheat3, alpha=.9,
#          linewidth=1.5, zorder=-1)

# plot the confidence interval around the means
plt.vlines(idx, p25th, p75th, linewidth=2.5,
          color=wheat4, zorder=-1)


# plot the present year time-series
plt.plot(idx,inflEA, color='k',linewidth=2, zorder=10);

# plot the made-up 2014 range. don't know what this was supposed to show.
ax.vlines(idx[len(idx) // 8 + 2], -4, -1, linewidth=5, color=wheat2)
ax.vlines(idx[len(idx) // 8 + 2], -3, -2, linewidth=5, color=wheat4)
#ax.errorbar(len(idx) // 8 + 3, -2.5, yerr=.5, capsize=2, capthick=1,
#            color='black')
ax.text(len(idx) // 8 + 4, -2.5, "IQR", verticalalignment='center')

ax.text(len(idx) // 8 + 4, -1.2, "90 %-tile", verticalalignment='top')
ax.text(len(idx) // 8 + 4, -3.8, "10 %-tile", verticalalignment='top')
#ax.text(len(idx) // 2 - 1, 9, "2014 Temperature",
#        horizontalalignment='right')


ax.plot(ind_hist_high, hist_high, 'ro',markersize=10)
ax.plot(ind_hist_low, hist_low, 'bo',markersize=10)

ax.annotate("historical low",
            xy=(ind_hist_low,hist_low), xytext=(50, -45),
            textcoords='offset points', #arrowprops=dict(facecolor='blue',
                                        #                arrowstyle="->",
                                                        #connectionstyle="angle3",
                                       #                 width=2,
                                        #                headwidth=0,
                                        #                shrink=.02),
            arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.2'),
            #arrowprops=dict(arrowstyle='->', lw= 4, color= 'blue')
            color='blue', horizontalalignment='left')

ax.annotate("historical high",
           xy=(ind_hist_high,hist_high), xytext=(ind_hist_high + 0, 6),
           textcoords='offset points',
           #arrowprops=dict(facecolor='red',width=2,headwidth=0,shrink=.02),
           arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=0.2'), 
            color='red', horizontalalignment='center')



##############
## formatting
#
plt.xticks(year_beginning_indeces,
               year_beginning_names,
               fontsize=12)


left_spine = ax.spines['left']
left_spine.set_visible(True)
left_spine.set_color(wheat4)
left_spine.set_linewidth(2)

ax.xaxis.set_ticklabels([])
ax.xaxis.grid(color=wheat3, linestyle='dotted',linewidth=2)

plt.xticks(year_beginning_indeces,
               year_beginning_names,
               fontsize=12)

plt.xlim(-5,200)


yticks = (range(-4, 10, 2))
ax.yaxis.set_ticks(yticks)
          
ylabels = [str(i) + u"%" for i in yticks]
          
ax.yaxis.set_ticklabels(ylabels, fontsize=14)
ax.yaxis.grid(color='white', zorder=1)



ax.set_title("Headline and disaggregated inflation, Jan 2000 - May 2016", loc="left",
             fontsize=23)
ax.text(0, 8.5, "annual rate of change", fontsize=15,
        fontdict=dict(weight='bold'))

ax.set_xlim(-5,200)
ax.set_ylim(-5, 9)
fig.savefig("Inflation-Items.svg")
fig.savefig("Inflation-Items.png", dpi=200)



In [ ]: