Make a plot of HICP inflation by item groups


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
import numpy as np

from matplotlib.ticker import FixedLocator, FixedFormatter
#import seaborn as sns

to_colors = lambda x : x/255.

In [2]:
ls


 Volume in drive C is OSDisk
 Volume Serial Number is F2B8-9F6A

 Directory of c:\Users\EEU227\Documents\Projects\Inflation\Disaggregated-Data

05/27/2016  05:33 PM    <DIR>          .
05/27/2016  05:33 PM    <DIR>          ..
05/27/2016  04:33 PM    <DIR>          .ipynb_checkpoints
05/27/2016  11:28 AM            25,434 .Rhistory
05/27/2016  03:57 PM            47,613 get-raw-data_HICP-items.ipynb
05/27/2016  11:28 AM           118,648 get-raw-data-HICP-countries.ipynb
05/27/2016  05:33 PM           349,266 infl_items.csv
05/13/2016  08:30 PM            33,807 raw_data_countries.csv
05/27/2016  03:57 PM           120,735 raw_data_items.csv
05/27/2016  11:28 AM           115,640 weather-like-plot-HICP-by-country.ipynb
05/27/2016  05:33 PM           122,140 weather-like-plot-HICP-by-item.ipynb
05/27/2016  11:52 AM             9,965 weather-like-plot-HICP-by-item.R
               9 File(s)        943,248 bytes
               3 Dir(s)  313,065,000,960 bytes free

In [3]:
df_ind_items = pd.read_csv('raw_data_items.csv',header=0,index_col=0,parse_dates=0)
df_ind_items.head()


Out[3]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184325 240469 240487 240505 100281 241333 241405 241441 184343 184361
date
1999-01-01 73.87 71.97 68.21 66.64 76.49 71.21 75.94 70.13 77.03 82.79 ... 71.18 NaN NaN NaN 63.99 NaN NaN NaN 73.68 70.42
1999-02-01 74.06 71.89 67.20 67.54 76.58 71.28 75.89 70.93 77.18 82.82 ... 71.69 NaN NaN NaN 64.42 NaN NaN NaN 73.75 70.53
1999-03-01 74.27 71.75 67.35 68.22 76.89 71.36 75.91 71.61 77.27 83.07 ... 72.10 NaN NaN NaN 64.43 NaN NaN NaN 73.74 70.55
1999-04-01 74.51 71.65 67.56 68.76 78.04 71.41 75.86 72.05 77.34 83.16 ... 72.25 NaN NaN NaN 64.43 NaN NaN NaN 73.88 70.61
1999-05-01 74.57 71.54 67.50 70.41 77.25 71.43 75.77 72.35 77.39 83.35 ... 72.85 NaN NaN NaN 64.40 NaN NaN NaN 74.25 70.71

5 rows × 94 columns


In [4]:
df_ind_items.index


Out[4]:
DatetimeIndex(['1999-01-01', '1999-02-01', '1999-03-01', '1999-04-01',
               '1999-05-01', '1999-06-01', '1999-07-01', '1999-08-01',
               '1999-09-01', '1999-10-01',
               ...
               '2015-07-01', '2015-08-01', '2015-09-01', '2015-10-01',
               '2015-11-01', '2015-12-01', '2016-01-01', '2016-02-01',
               '2016-03-01', '2016-04-01'],
              dtype='datetime64[ns]', name='date', length=208, freq=None)

Compute annual inflation rates


In [5]:
df_infl_items = df_ind_items.pct_change(periods=12)*100
mask_rows_infl = df_infl_items.index.year >= 2000
df_infl_items = df_infl_items[mask_rows_infl]
df_infl_items.tail()


Out[5]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184325 240469 240487 240505 100281 241333 241405 241441 184343 184361
date
2015-12-01 0.230138 0.000000 2.316213 4.992167 4.102994 0.512048 -1.906361 6.711618 0.968523 0.070007 ... 0.339389 0.905250 -0.069763 1.579267 1.507538 2.171266 2.411968 3.824522 0.717172 -0.200200
2016-01-01 0.325203 0.019992 3.778664 5.160331 0.372159 0.360794 -1.613222 6.078088 0.854100 0.070119 ... 0.298507 0.751804 -0.199302 2.033830 1.647413 2.222222 2.204188 3.875418 0.704225 -0.358780
2016-02-01 -0.151469 0.010010 2.987026 4.825850 -2.453690 0.421095 -1.457466 5.469630 0.370556 0.090036 ... -0.288902 0.810730 0.079745 2.178277 1.656959 2.054174 2.453926 3.403009 0.905159 -0.388562
2016-03-01 -0.049925 -0.010015 2.577268 4.757463 0.669687 0.360577 -1.321017 3.610548 0.349511 -0.139986 ... -0.169678 0.740741 0.510051 2.074772 1.656460 1.959799 2.533293 3.403009 0.732785 -0.467569
2016-04-01 -0.249029 -0.040044 3.119654 2.942679 1.235048 0.350210 -1.471319 2.790651 0.729198 0.219956 ... 1.080973 0.690000 0.709645 2.003205 1.716867 1.874875 2.514468 3.319879 -0.330860 -0.328260

5 rows × 94 columns


In [6]:
tt = df_infl_items.copy() 
tt['month'] = tt.index.month 
tt['year'] = tt.index.year
tt.head()


Out[6]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 240487 240505 100281 241333 241405 241441 184343 184361 month year
date
2000-01-01 1.868147 0.555787 2.844158 -0.450180 -5.687018 0.983008 -0.039505 2.837587 0.921719 1.171639 ... NaN NaN 2.812940 NaN NaN NaN 3.610206 1.377450 1 2000
2000-02-01 1.930867 0.792878 2.485119 -0.636660 -3.538783 0.982043 0.052708 1.649514 0.712620 1.195363 ... NaN NaN 2.390562 NaN NaN NaN 3.877966 1.403658 2 2000
2000-03-01 1.952336 1.128920 2.108389 -2.477279 -5.124203 0.994955 0.079041 0.516688 0.608257 0.854701 ... NaN NaN 2.467795 NaN NaN NaN 3.892053 1.459957 3 2000
2000-04-01 1.717890 1.618981 2.708703 -1.759744 -3.703229 0.966251 0.250461 -0.416378 0.568916 0.925926 ... NaN NaN 2.483315 NaN NaN NaN 3.708717 1.458717 4 2000
2000-05-01 1.756739 2.026838 2.696296 -1.988354 -2.757282 1.161977 0.369539 -1.285418 0.607314 0.611878 ... NaN NaN 2.515528 NaN NaN NaN 3.326599 1.499081 5 2000

5 rows × 96 columns


In [7]:
tt.to_csv('infl_items.csv')

df_infl_items.rename(columns = dic)

tt = df_infl_items.copy() tt['month'] = tt.index.month tt['year'] = tt.index.year melted_df = pd.melt(tt,id_vars=['month','year']) melted_df.head()


In [8]:
df_infl_items['min'] = df_infl_items.apply(min,axis=1)
df_infl_items['max'] = df_infl_items.apply(max,axis=1)
df_infl_items['mean'] = df_infl_items.apply(np.mean,axis=1)
df_infl_items['mode'] = df_infl_items.quantile(q=0.5, axis=1)
df_infl_items['10th'] = df_infl_items.quantile(q=0.10, axis=1)
df_infl_items['90th'] = df_infl_items.quantile(q=0.90, axis=1)
df_infl_items['25th'] = df_infl_items.quantile(q=0.25, axis=1)
df_infl_items['75th'] = df_infl_items.quantile(q=0.75, axis=1)

In [9]:
df_infl_items.tail()


Out[9]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184343 184361 min max mean mode 10th 90th 25th 75th
date
2015-12-01 0.230138 0.000000 2.316213 4.992167 4.102994 0.512048 -1.906361 6.711618 0.968523 0.070007 ... 0.717172 -0.200200 -21.973878 6.711618 0.450269 0.873582 -1.198068 2.782478 -0.017441 1.611441
2016-01-01 0.325203 0.019992 3.778664 5.160331 0.372159 0.360794 -1.613222 6.078088 0.854100 0.070119 ... 0.704225 -0.358780 -22.383367 6.078088 0.526244 0.913739 -0.650838 2.680877 0.193033 1.581545
2016-02-01 -0.151469 0.010010 2.987026 4.825850 -2.453690 0.421095 -1.457466 5.469630 0.370556 0.090036 ... 0.905159 -0.388562 -28.846701 5.469630 0.115757 0.901984 -1.352614 2.466629 -0.180107 1.448255
2016-03-01 -0.049925 -0.010015 2.577268 4.757463 0.669687 0.360577 -1.321017 3.610548 0.349511 -0.139986 ... 0.732785 -0.467569 -26.546776 5.678963 0.257141 0.801844 -0.963257 2.597829 -0.019993 1.536607
2016-04-01 -0.249029 -0.040044 3.119654 2.942679 1.235048 0.350210 -1.471319 2.790651 0.729198 0.219956 ... -0.330860 -0.328260 -25.965968 21.644329 0.408413 0.781094 -1.384437 2.699406 0.005046 1.540548

5 rows × 102 columns

df_infl_items['month'] = df_infl_items.index.month df_infl_items['year'] = df_infl_items.index.year


In [10]:
df_infl_items.head()


Out[10]:
76451 182759 182777 182831 182849 182741 182795 182813 182867 182885 ... 184343 184361 min max mean mode 10th 90th 25th 75th
date
2000-01-01 1.868147 0.555787 2.844158 -0.450180 -5.687018 0.983008 -0.039505 2.837587 0.921719 1.171639 ... 3.610206 1.377450 -13.013418 47.799576 2.038340 1.213298 -1.082721 3.586112 0.369168 2.157921
2000-02-01 1.930867 0.792878 2.485119 -0.636660 -3.538783 0.982043 0.052708 1.649514 0.712620 1.195363 ... 3.877966 1.403658 -15.192846 54.939759 2.223727 1.205807 -1.079527 3.670695 0.356208 2.159962
2000-03-01 1.952336 1.128920 2.108389 -2.477279 -5.124203 0.994955 0.079041 0.516688 0.608257 0.854701 ... 3.892053 1.459957 -17.374306 49.937765 2.054702 1.128920 -1.207884 3.756614 0.410474 2.054702
2000-04-01 1.717890 1.618981 2.708703 -1.759744 -3.703229 0.966251 0.250461 -0.416378 0.568916 0.925926 ... 3.708717 1.458717 -18.253343 33.193669 1.722454 1.259756 -1.094472 3.960396 0.382868 2.028562
2000-05-01 1.756739 2.026838 2.696296 -1.988354 -2.757282 1.161977 0.369539 -1.285418 0.607314 0.611878 ... 3.326599 1.499081 -18.313494 38.948598 1.816464 1.264173 -1.319059 3.734629 0.365247 2.050354

5 rows × 102 columns


In [11]:
print(df_infl_items.describe())


            76451      182759      182777      182831      182849      182741  \
count  196.000000  196.000000  196.000000  196.000000  196.000000  196.000000   
mean     1.814312    2.057310    2.485505    2.617403    2.081105    2.107576   
std      0.971035    1.984083    1.545281    3.952119    4.594963    2.113824   
min     -0.651324   -0.670488   -1.830638   -9.261530   -9.009637   -0.725875   
25%      1.471035    0.595766    1.434416   -0.344256   -0.770458    0.666500   
50%      2.043150    1.735726    2.379725    2.545525    2.189455    1.853340   
75%      2.420494    3.126366    3.744155    5.561125    5.145358    2.646639   
max      4.055123    9.083436    6.150938   11.422867   18.783945   10.272587   

           182795      182813      182867      182885     ...          184343  \
count  196.000000  196.000000  196.000000  196.000000     ...      196.000000   
mean     1.739714    2.201525    1.603659    1.134111     ...        1.844340   
std      3.441002    3.982286    1.478999    1.380952     ...        2.563309   
min     -4.390395   -7.174840   -0.711494   -1.146812     ...       -6.991995   
25%      0.038404   -0.728542    0.563178    0.085057     ...        0.601804   
50%      1.088888    2.365175    1.150364    0.919632     ...        1.601530   
75%      2.852254    4.815616    2.578132    2.139633     ...        3.223291   
max     14.431778   12.586099    5.007431    4.726287     ...        7.077041   

           184361         min         max        mean        mode        10th  \
count  196.000000  196.000000  196.000000  196.000000  196.000000  196.000000   
mean     2.133906  -15.664433   17.654005    1.637348    1.744360   -0.959812   
std      0.905060    6.387701   11.799842    0.783164    0.508936    0.922741   
min     -0.467569  -41.515323    3.238033   -0.332101    0.613806   -4.462967   
25%      1.685991  -19.629614    8.295702    1.342616    1.428807   -1.400077   
50%      2.194360  -14.355109   14.380168    1.796590    1.822980   -0.819098   
75%      2.641996  -10.899278   24.043291    2.139447    2.100926   -0.255091   
max      4.172082   -7.456915   59.267503    3.458749    2.676718    0.537287   

             90th        25th        75th  
count  196.000000  196.000000  196.000000  
mean     4.198388    0.734434    2.733777  
std      1.000184    0.569478    0.588020  
min      2.222576   -0.518146    1.408307  
25%      3.574396    0.307586    2.259585  
50%      4.222945    0.800833    2.909834  
75%      4.748288    1.182103    3.125744  
max      7.980168    1.670282    3.837905  

[8 rows x 102 columns]

Generate a bunch of histograms of the data to make sure that all of the data

is in an expected range.

with plt.style.context('https://gist.githubusercontent.com/rhiever/d0a7332fe0beebfdc3d5/raw/223d70799b48131d5ce2723cd5784f39d7a3a653/tableau10.mplstyle'): for column in df_infl_items.columns[:-2]:

    #if column in ['date']:
    #    continue
    plt.figure()
    plt.hist(df_infl_items[column].values)
    plt.title(column)
    #plt.savefig('{}.png'.format(column))

In [12]:
len(df_infl_items)


Out[12]:
196

In [13]:
df_infl_items.columns


Out[13]:
Index(['76451', '182759', '182777', '182831', '182849', '182741', '182795',
       '182813', '182867', '182885',
       ...
       '184343', '184361', 'min', 'max', 'mean', 'mode', '10th', '90th',
       '25th', '75th'],
      dtype='object', length=102)

In [14]:
df_infl_items['month_order'] = range(len(df_infl_items))
month_order = df_infl_items['month_order']
max_infl = df_infl_items['max'].values
min_infl = df_infl_items['min'].values
mean_infl = df_infl_items['mean'].values
mode_infl = df_infl_items['mode'].values
p25th = df_infl_items['25th'].values
p75th = df_infl_items['75th'].values
p10th = df_infl_items['10th'].values
p90th = df_infl_items['90th'].values
inflEA = df_infl_items['76451'].values

In [15]:
year_begin_df = df_infl_items[df_infl_items.index.month == 1]
year_begin_df;

In [16]:
year_beginning_indeces = list(year_begin_df['month_order'].values)
year_beginning_indeces


Out[16]:
[0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168, 180, 192]

In [17]:
year_beginning_names = list(year_begin_df.index.year)
year_beginning_names


Out[17]:
[2000,
 2001,
 2002,
 2003,
 2004,
 2005,
 2006,
 2007,
 2008,
 2009,
 2010,
 2011,
 2012,
 2013,
 2014,
 2015,
 2016]

In [18]:
month_order;

In [19]:
blue3 = map(to_colors, (24, 116, 205))  # 1874CD
wheat2 = map(to_colors, (238, 216, 174))  # EED8AE
wheat3 = map(to_colors, (205, 186, 150))  # CDBA96
wheat4 = map(to_colors, (139, 126, 102))  # 8B7E66
firebrick3 = map(to_colors, (205, 38, 38))  # CD2626
gray30 = map(to_colors, (77, 77, 77))  # 4D4D4D

In [ ]:


In [20]:
fig, ax1 = plt.subplots(figsize=(15,7))

plt.bar(month_order, p90th - p10th, bottom=p10th,
            edgecolor='none', color='#C3BBA4', width=1);

# Create the bars showing average highs and lows
plt.bar(month_order, p75th - p25th, bottom=p25th,
            edgecolor='none', color='#9A9180', width=1);

#annotations={month_order[50]:'Dividends'}
plt.plot(month_order, inflEA, color='#5A3B49',linewidth=2 );


plt.plot(month_order, mode_infl, color='wheat',linewidth=2,alpha=.3);

plt.xticks(year_beginning_indeces,
               year_beginning_names,
               fontsize=10)

#ax2 = ax1.twiny()
    

plt.xlim(-5,200)
plt.grid(False)


##ax2 = ax1.twiny()
plt.ylim(-5, 10)

#ax3 = ax1.twinx()

plt.yticks(range(-4, 10, 2), [r'{}'.format(x)
                              for x in range(-4, 10, 2)], fontsize=10);



plt.grid(axis='both', color='wheat', linewidth=1.5, alpha = .5)

plt.title('HICP inflation, annual rate of change, Jan 2000 - March 2016\n\n', fontsize=20);



In [ ]: