notebook.community

Edit and run



In [1]:

    
# This is my notebook for exploring data about economic inequality in Cambodia.

%matplotlib inline
import pandas as pd
import matplotlib as mp
import matplotlib.pyplot as plt
import qgrid
from pylab import *
import seaborn as sb

# Hey, good news! We can remotely access the World Bank's World Development Indicators Database
# directly from pandas!

from pandas.io import wb



In [2]:

    
# First, search the database for all poverty-related indicator names and store them.
# I didn't use qgrid because it wouldn't display the id column correctly for some reason. It'd look nicer if it worked, though.

pov = wb.search('pov.*%').iloc[:,:2]
pov









    Out[2]:






  
    
      
      id
      name
    
  
  
    
      5453 
               IN.POV.HCR.EST.RURL
                       Poverty HCR Estimates (%) - Rural
    
    
      5454 
               IN.POV.HCR.EST.TOTL
                       Poverty HCR Estimates (%) - Total
    
    
      5455 
               IN.POV.HCR.EST.URBN
                       Poverty HCR Estimates (%) - Urban
    
    
      7529 
                      SI.POV.25DAY
       Poverty headcount ratio at $2.5 a day (PPP) (%...
    
    
      7530 
                       SI.POV.2DAY
       Poverty headcount ratio at $2 a day (PPP) (% o...
    
    
      7531 
                       SI.POV.4DAY
       Poverty headcount ratio at $4 a day (PPP) (% o...
    
    
      7532 
                       SI.POV.5DAY
       Poverty headcount ratio at $5 a day (PPP) (% o...
    
    
      7533 
                       SI.POV.DDAY
       Poverty headcount ratio at $1.25 a day (PPP) (...
    
    
      7534 
                       SI.POV.GAP2
                       Poverty gap at $2 a day (PPP) (%)
    
    
      7535 
                      SI.POV.GAP25
                     Poverty gap at $2.5 a day (PPP) (%)
    
    
      7536 
                       SI.POV.GAP4
                       Poverty gap at $4 a day (PPP) (%)
    
    
      7537 
                       SI.POV.GAP5
                       Poverty gap at $5 a day (PPP) (%)
    
    
      7538 
                       SI.POV.GAPS
                    Poverty gap at $1.25 a day (PPP) (%)
    
    
      7540 
                       SI.POV.NAGP
               Poverty gap at national poverty lines (%)
    
    
      7541 
                       SI.POV.NAHC
       Poverty headcount ratio at national poverty li...
    
    
      7547 
                       SI.POV.RUGP
         Rural poverty gap at national poverty lines (%)
    
    
      7548 
                       SI.POV.RUHC
       Rural poverty headcount ratio at national pove...
    
    
      7549 
                       SI.POV.URGP
         Urban poverty gap at national poverty lines (%)
    
    
      7550 
                       SI.POV.URHC
       Urban poverty headcount ratio at national pove...
    
    
      10162
                 ccx_povchi_40_fem
       Poverty headcount of children (below bottom 40...
    
    
      10163
                 ccx_povchi_40_mal
       Poverty headcount of children (below bottom 40...
    
    
      10164
                 ccx_povchi_40_rur
       Poverty headcount of children (below bottom 40...
    
    
      10165
                 ccx_povchi_40_tot
        Poverty headcount of children (below bottom 40%)
    
    
      10166
                 ccx_povchi_40_urb
       Poverty headcount of children (below bottom 40...
    
    
      10167
                 ccx_poveld_40_fem
       Poverty headcount of the elderly (below bottom...
    
    
      10168
                 ccx_poveld_40_mal
       Poverty headcount of the elderly (below bottom...
    
    
      10169
                 ccx_poveld_40_rur
       Poverty headcount of the elderly (below bottom...
    
    
      10170
                 ccx_poveld_40_tot
       Poverty headcount of the elderly (below bottom...
    
    
      10171
                 ccx_poveld_40_urb
       Poverty headcount of the elderly (below bottom...
    
    
      10172
                 ccx_povwka_40_fem
       Poverty headcount of working age adults (below...
    
    
      ...
      ...
      ...
    
    
      12761
       per_si_allsi_p1_ep_preT_tot
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12762
            per_si_allsi_p1_ep_tot
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12763
          per_si_allsi_p1_preT_tot
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12764
               per_si_allsi_p1_rur
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12765
               per_si_allsi_p1_tot
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12766
               per_si_allsi_p1_urb
       Poverty Gap reduction (%) -  All Social Insura...
    
    
      12901
          per_si_oa_p0_ep_preT_tot
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12902
               per_si_oa_p0_ep_tot
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12903
             per_si_oa_p0_preT_tot
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12904
                  per_si_oa_p0_rur
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12905
                  per_si_oa_p0_tot
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12906
                  per_si_oa_p0_urb
       Poverty Headcount reduction (%) -  Old Age Con...
    
    
      12907
          per_si_oa_p1_ep_preT_tot
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      12908
               per_si_oa_p1_ep_tot
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      12909
             per_si_oa_p1_preT_tot
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      12910
                  per_si_oa_p1_rur
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      12911
                  per_si_oa_p1_tot
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      12912
                  per_si_oa_p1_urb
       Poverty Gap reduction (%) -  Old Age Contribut...
    
    
      13047
          per_si_ss_p0_ep_preT_tot
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13048
               per_si_ss_p0_ep_tot
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13049
             per_si_ss_p0_preT_tot
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13050
                  per_si_ss_p0_rur
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13051
                  per_si_ss_p0_tot
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13052
                  per_si_ss_p0_urb
       Poverty Headcount reduction (%) -  Other Socia...
    
    
      13053
          per_si_ss_p1_ep_preT_tot
       Poverty Gap reduction (%) -  Other Social Insu...
    
    
      13054
               per_si_ss_p1_ep_tot
       Poverty Gap reduction (%) -  Other Social Insu...
    
    
      13055
             per_si_ss_p1_preT_tot
       Poverty Gap reduction (%) -  Other Social Insu...
    
    
      13056
                  per_si_ss_p1_rur
       Poverty Gap reduction (%) -  Other Social Insu...
    
    
      13057
                  per_si_ss_p1_tot
       Poverty Gap reduction (%) -  Other Social Insu...
    
    
      13058
                  per_si_ss_p1_urb
       Poverty Gap reduction (%) -  Other Social Insu...
    
  

231 rows × 2 columns



In [3]:

    
# Strip the English labels from the id's and store them in a separate table

povnames = pov.loc[7529:7550, 'name']
povnames = povnames.tolist()

# Keep only the id's in the original pov table

pov = pov.loc[7529:7550, 'id']
pov = pov.tolist()

# Take a look

povnames









    Out[3]:





[u'Poverty headcount ratio at $2.5 a day (PPP) (% of population)',
 u'Poverty headcount ratio at $2 a day (PPP) (% of population)',
 u'Poverty headcount ratio at $4 a day (PPP) (% of population)',
 u'Poverty headcount ratio at $5 a day (PPP) (% of population)',
 u'Poverty headcount ratio at $1.25 a day (PPP) (% of population)',
 u'Poverty gap at $2 a day (PPP) (%)',
 u'Poverty gap at $2.5 a day (PPP) (%)',
 u'Poverty gap at $4 a day (PPP) (%)',
 u'Poverty gap at $5 a day (PPP) (%)',
 u'Poverty gap at $1.25 a day (PPP) (%)',
 u'Poverty gap at national poverty lines (%)',
 u'Poverty headcount ratio at national poverty lines (% of population)',
 u'Rural poverty gap at national poverty lines (%)',
 u'Rural poverty headcount ratio at national poverty lines (% of rural population)',
 u'Urban poverty gap at national poverty lines (%)',
 u'Urban poverty headcount ratio at national poverty lines (% of urban population)']



In [4]:

    
pov









    Out[4]:





[u'SI.POV.25DAY',
 u'SI.POV.2DAY',
 u'SI.POV.4DAY',
 u'SI.POV.5DAY',
 u'SI.POV.DDAY',
 u'SI.POV.GAP2',
 u'SI.POV.GAP25',
 u'SI.POV.GAP4',
 u'SI.POV.GAP5',
 u'SI.POV.GAPS',
 u'SI.POV.NAGP',
 u'SI.POV.NAHC',
 u'SI.POV.RUGP',
 u'SI.POV.RUHC',
 u'SI.POV.URGP',
 u'SI.POV.URHC']



In [5]:

    
# Create a dictionary of the names and id's
povdict = dict(zip(pov, povnames))
povdict









    Out[5]:





{u'SI.POV.25DAY': u'Poverty headcount ratio at $2.5 a day (PPP) (% of population)',
 u'SI.POV.2DAY': u'Poverty headcount ratio at $2 a day (PPP) (% of population)',
 u'SI.POV.4DAY': u'Poverty headcount ratio at $4 a day (PPP) (% of population)',
 u'SI.POV.5DAY': u'Poverty headcount ratio at $5 a day (PPP) (% of population)',
 u'SI.POV.DDAY': u'Poverty headcount ratio at $1.25 a day (PPP) (% of population)',
 u'SI.POV.GAP2': u'Poverty gap at $2 a day (PPP) (%)',
 u'SI.POV.GAP25': u'Poverty gap at $2.5 a day (PPP) (%)',
 u'SI.POV.GAP4': u'Poverty gap at $4 a day (PPP) (%)',
 u'SI.POV.GAP5': u'Poverty gap at $5 a day (PPP) (%)',
 u'SI.POV.GAPS': u'Poverty gap at $1.25 a day (PPP) (%)',
 u'SI.POV.NAGP': u'Poverty gap at national poverty lines (%)',
 u'SI.POV.NAHC': u'Poverty headcount ratio at national poverty lines (% of population)',
 u'SI.POV.RUGP': u'Rural poverty gap at national poverty lines (%)',
 u'SI.POV.RUHC': u'Rural poverty headcount ratio at national poverty lines (% of rural population)',
 u'SI.POV.URGP': u'Urban poverty gap at national poverty lines (%)',
 u'SI.POV.URHC': u'Urban poverty headcount ratio at national poverty lines (% of urban population)'}



In [6]:

    
# Now, look for all income related indicators and store them

inc = wb.search('income.*share.*%').iloc[:,:2]
inc









    Out[6]:






  
    
      
      id
      name
    
  
  
    
      7522
       SI.DST.02ND.20
        Income share held by second 20%
    
    
      7523
       SI.DST.03RD.20
         Income share held by third 20%
    
    
      7524
       SI.DST.04TH.20
        Income share held by fourth 20%
    
    
      7525
       SI.DST.05TH.20
       Income share held by highest 20%
    
    
      7526
       SI.DST.10TH.10
       Income share held by highest 10%
    
    
      7527
       SI.DST.FRST.10
        Income share held by lowest 10%
    
    
      7528
       SI.DST.FRST.20
        Income share held by lowest 20%



In [7]:

    
# Repeat what was done with the poverty indicators

incnames = inc.loc[:, 'name']
incnames = incnames.tolist()
inc = inc.loc[:,'id']
inc = inc.tolist()
incnames









    Out[7]:





[u'Income share held by second 20%',
 u'Income share held by third 20%',
 u'Income share held by fourth 20%',
 u'Income share held by highest 20%',
 u'Income share held by highest 10%',
 u'Income share held by lowest 10%',
 u'Income share held by lowest 20%']



In [8]:

    
inc









    Out[8]:





[u'SI.DST.02ND.20',
 u'SI.DST.03RD.20',
 u'SI.DST.04TH.20',
 u'SI.DST.05TH.20',
 u'SI.DST.10TH.10',
 u'SI.DST.FRST.10',
 u'SI.DST.FRST.20']



In [9]:

    
# Create another dictionary for income

incdict = dict(zip(inc, incnames))
incdict









    Out[9]:





{u'SI.DST.02ND.20': u'Income share held by second 20%',
 u'SI.DST.03RD.20': u'Income share held by third 20%',
 u'SI.DST.04TH.20': u'Income share held by fourth 20%',
 u'SI.DST.05TH.20': u'Income share held by highest 20%',
 u'SI.DST.10TH.10': u'Income share held by highest 10%',
 u'SI.DST.FRST.10': u'Income share held by lowest 10%',
 u'SI.DST.FRST.20': u'Income share held by lowest 20%'}



In [10]:

    
# Create master list of all of the data we want to download:

idx = pov + inc
idx









    Out[10]:





[u'SI.POV.25DAY',
 u'SI.POV.2DAY',
 u'SI.POV.4DAY',
 u'SI.POV.5DAY',
 u'SI.POV.DDAY',
 u'SI.POV.GAP2',
 u'SI.POV.GAP25',
 u'SI.POV.GAP4',
 u'SI.POV.GAP5',
 u'SI.POV.GAPS',
 u'SI.POV.NAGP',
 u'SI.POV.NAHC',
 u'SI.POV.RUGP',
 u'SI.POV.RUHC',
 u'SI.POV.URGP',
 u'SI.POV.URHC',
 u'SI.DST.02ND.20',
 u'SI.DST.03RD.20',
 u'SI.DST.04TH.20',
 u'SI.DST.05TH.20',
 u'SI.DST.10TH.10',
 u'SI.DST.FRST.10',
 u'SI.DST.FRST.20']



In [11]:

    
# Download data and store it as a DataFrame

khm = wb.download(indicator=idx, country='KHM', start=2004, end=2012)
khm









    Out[11]:






  
    
      
      
      SI.POV.25DAY
      SI.POV.2DAY
      SI.POV.4DAY
      SI.POV.5DAY
      SI.POV.DDAY
      SI.POV.GAP2
      SI.POV.GAP25
      SI.POV.GAP4
      SI.POV.GAP5
      SI.POV.GAPS
      ...
      SI.POV.RUHC
      SI.POV.URGP
      SI.POV.URHC
      SI.DST.02ND.20
      SI.DST.03RD.20
      SI.DST.04TH.20
      SI.DST.05TH.20
      SI.DST.10TH.10
      SI.DST.FRST.10
      SI.DST.FRST.20
    
    
      country
      year
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Cambodia
      2012
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
      ...
       20.8
      NaN
        6.4
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
        NaN
    
    
      2011
       59.00
       41.26
       85.70
       91.83
       10.05
       10.25
       18.29
       39.60
       49.51
       1.43
      ...
       23.6
      NaN
        8.7
       12.46
       16.11
       21.24
       41.20
       26.91
       4.04
       8.99
    
    
      2010
       57.59
       40.88
       84.06
       90.63
       11.25
       10.59
       18.37
       39.00
       48.74
       1.70
      ...
       25.3
      NaN
        8.5
       12.02
       15.80
       21.18
       42.49
       28.01
       3.80
       8.51
    
    
      2009
       56.25
       40.74
       82.20
       89.25
       12.93
       11.21
       18.71
       38.50
       48.02
       2.08
      ...
       27.5
      NaN
        8.0
       11.66
       15.68
       21.48
       43.15
       28.20
       3.55
       8.03
    
    
      2008
       65.90
       51.05
       87.33
       92.48
       20.89
       16.27
       24.78
       45.05
       54.08
       4.39
      ...
       38.5
      NaN
       15.1
       11.60
       15.67
       21.43
       43.45
       28.57
       3.44
       7.85
    
    
      2007
       71.05
       59.39
       87.65
       92.07
       30.82
       21.92
       30.64
       49.51
       57.63
       7.24
      ...
       51.4
      NaN
       18.3
       10.05
       13.95
       20.16
       48.89
       33.99
       3.12
       6.95
    
    
      2006
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
      ...
        NaN
      NaN
        NaN
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
        NaN
    
    
      2005
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
      ...
        NaN
      NaN
        NaN
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
        NaN
    
    
      2004
       76.54
       64.43
       91.55
       94.95
       32.77
       23.64
       33.09
       52.85
       60.96
       7.79
      ...
       54.2
      NaN
       28.5
       11.43
       15.37
       21.22
       44.00
       29.09
       3.56
       7.98
    
  

9 rows × 23 columns



In [12]:

    
# Reverse the order of the DataFrame so the years are ascending, drop Cambodia index, drop categories with all NA's

khm.index = khm.index.droplevel(0)
khm = khm.iloc[::-1]
khm = khm.dropna(axis=1, how='all')
qgrid.show_grid(khm, remote_js=True)



In [13]:

    
# Let's look at the data of percent of total income earned from the highest 10% next to that of
# the lowest 10%

incframe = khm[['SI.DST.10TH.10', 'SI.DST.FRST.10']]
incframe = incframe[0:8] # No data for 2012, so let's omit it
incframe.columns = [incdict[incframe.columns.tolist()[0]], incdict[incframe.columns.tolist()[1]]]
incframe









    Out[13]:






  
    
      
      Income share held by highest 10%
      Income share held by lowest 10%
    
    
      year
      
      
    
  
  
    
      2004
       29.09
       3.56
    
    
      2005
         NaN
        NaN
    
    
      2006
         NaN
        NaN
    
    
      2007
       33.99
       3.12
    
    
      2008
       28.57
       3.44
    
    
      2009
       28.20
       3.55
    
    
      2010
       28.01
       3.80
    
    
      2011
       26.91
       4.04



In [14]:

    
incframe.columns = ['Highest 10% of Income Earners', 'Lowest 10% of Income Earners']

plt.figure()
incframe.plot(title = 'Income Share in Cambodia', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')









    Out[14]:





<matplotlib.text.Text at 0x10d85eb10>






    





<matplotlib.figure.Figure at 0x10d9f4910>



In [15]:

    
incframe1 = khm[['SI.DST.FRST.20', 'SI.DST.02ND.20', 'SI.DST.03RD.20', 'SI.DST.04TH.20', 'SI.DST.05TH.20'
                 ]]

# Loop to look up ids in dictionary to rename the columns
newcolumns = range(0,len(incframe1.columns))
for i in range(0, len(incframe1.columns)):
    newcolumns[i] = incdict[incframe1.columns.tolist()[i]]
incframe1.columns = newcolumns

incframe1 = incframe1[0:8] #Omit 2012, no data
incframe1









    Out[15]:






  
    
      
      Income share held by lowest 20%
      Income share held by second 20%
      Income share held by third 20%
      Income share held by fourth 20%
      Income share held by highest 20%
    
    
      year
      
      
      
      
      
    
  
  
    
      2004
       7.98
       11.43
       15.37
       21.22
       44.00
    
    
      2005
        NaN
         NaN
         NaN
         NaN
         NaN
    
    
      2006
        NaN
         NaN
         NaN
         NaN
         NaN
    
    
      2007
       6.95
       10.05
       13.95
       20.16
       48.89
    
    
      2008
       7.85
       11.60
       15.67
       21.43
       43.45
    
    
      2009
       8.03
       11.66
       15.68
       21.48
       43.15
    
    
      2010
       8.51
       12.02
       15.80
       21.18
       42.49
    
    
      2011
       8.99
       12.46
       16.11
       21.24
       41.20



In [16]:

    
# Change column names
incframe1.columns = ['Lowest 20%', 'Second Lowest 20%', 'Middle 20%', 'Second Highest 20%', 'Highest 20%']

plt.figure()
incframe1.plot(title = 'Income Share in Cambodia', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')
plt.ylim([0, 50])









    Out[16]:





(0, 50)






    





<matplotlib.figure.Figure at 0x10d9f4850>



In [17]:

    
# Stacked bar graph of above dataframe

plt.figure()
incframe1.plot(title = 'Income Share in Cambodia', kind = 'bar', stacked = 'True')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')









    Out[17]:





<matplotlib.text.Text at 0x10d816c90>






    





<matplotlib.figure.Figure at 0x10d967250>



In [18]:

    
# How does this compare to the US?

usa = wb.download(indicator=['SI.DST.10TH.10', 'SI.DST.FRST.10', 'SI.DST.FRST.20', 'SI.DST.02ND.20', 
                             'SI.DST.03RD.20', 'SI.DST.04TH.20', 'SI.DST.05TH.20'], 
                  country='USA', start=2004, end=2011)

usa.index = usa.index.droplevel(0)
usa = usa.iloc[::-1]
qgrid.show_grid(usa, remote_js=True)



In [19]:

    
# So there isn't nearly as much data for the US during this time period, but it's still worth looking at.

usainc = usa[['SI.DST.10TH.10', 'SI.DST.FRST.10']]

usainc.columns = ['Highest 10% of Income Earners', 'Lowest 10% of Income Earners']

plt.figure()
usainc.plot(title = 'Income Share in the USA', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')









    Out[19]:





<matplotlib.text.Text at 0x10aed8710>






    





<matplotlib.figure.Figure at 0x10d9dd210>



In [20]:

    
usainc1 = usa[['SI.DST.FRST.20', 'SI.DST.02ND.20', 'SI.DST.03RD.20', 'SI.DST.04TH.20', 'SI.DST.05TH.20']]

usainc1.columns = ['Lowest 20%', 'Second Lowest 20%', 'Middle 20%', 'Second Highest 20%', 'Highest 20%']

plt.figure()
usainc1.plot(title = 'Income Share in the USA', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')









    Out[20]:





<matplotlib.text.Text at 0x10d876950>






    





<matplotlib.figure.Figure at 0x10d9ff550>



In [21]:

    
plt.figure()
usainc1.plot(title = 'Income Share in the USA', kind = 'bar', stacked = 'True')
plt.xlabel('Year')
plt.ylabel('Share of National Income (%)')









    Out[21]:





<matplotlib.text.Text at 0x10d69ed50>






    





<matplotlib.figure.Figure at 0x10d926610>



In [27]:

    
# So... we aren't much better (perhaps worse). Let's look at the Gross National Income per capita for each
# country to get a better look at the differences

khmgni = wb.download(indicator=['NY.GNP.PCAP.CD'], country='KHM', start=2004, end=2011)
khmgni.index = khmgni.index.droplevel(0)
khmgni = khmgni.iloc[::-1]

qgrid.show_grid(khmgni, remote_js=True)



In [28]:

    
plt.figure()
khmgni.plot(title = 'Gross National Income per Capita in Cambodia', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Income ($)')









    Out[28]:





<matplotlib.text.Text at 0x10a84bfd0>






    





<matplotlib.figure.Figure at 0x10d067850>



In [29]:

    
usagni = wb.download(indicator=['NY.GNP.PCAP.CD'], country='USA', start=2004, end=2011)
usagni.index = usagni.index.droplevel(0)
usagni = usagni.iloc[::-1]

qgrid.show_grid(usagni, remote_js=True)



In [30]:

    
plt.figure()
usagni.plot(title = 'Gross National Income per Capita in the USA', style = 'o-')
plt.xlabel('Year')
plt.ylabel('Income ($)')









    Out[30]:





<matplotlib.text.Text at 0x109ca6810>






    





<matplotlib.figure.Figure at 0x109d0b310>



In [24]:

    
# Explore how close people are to the poverty boundary and how it is changing over time

povline = khm[['SI.POV.25DAY', 'SI.POV.2DAY', 'SI.POV.4DAY', 'SI.POV.5DAY', 'SI.POV.DDAY', 'SI.POV.NAHC']]
newcolumns = range(0,len(povline.columns))
for i in range(0, len(povline.columns)):
    newcolumns[i] = povdict[povline.columns.tolist()[i]]
povline.columns = newcolumns

povline









    Out[24]:






  
    
      
      Poverty headcount ratio at $2.5 a day (PPP) (% of population)
      Poverty headcount ratio at $2 a day (PPP) (% of population)
      Poverty headcount ratio at $4 a day (PPP) (% of population)
      Poverty headcount ratio at $5 a day (PPP) (% of population)
      Poverty headcount ratio at $1.25 a day (PPP) (% of population)
      Poverty headcount ratio at national poverty lines (% of population)
    
    
      year
      
      
      
      
      
      
    
  
  
    
      2004
       76.54
       64.43
       91.55
       94.95
       32.77
       50.2
    
    
      2005
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
    
    
      2006
         NaN
         NaN
         NaN
         NaN
         NaN
        NaN
    
    
      2007
       71.05
       59.39
       87.65
       92.07
       30.82
       45.0
    
    
      2008
       65.90
       51.05
       87.33
       92.48
       20.89
       34.0
    
    
      2009
       56.25
       40.74
       82.20
       89.25
       12.93
       23.9
    
    
      2010
       57.59
       40.88
       84.06
       90.63
       11.25
       22.1
    
    
      2011
       59.00
       41.26
       85.70
       91.83
       10.05
       20.5
    
    
      2012
         NaN
         NaN
         NaN
         NaN
         NaN
       17.7



In [31]:

    
# Change titles of columns for plotting, then plot
povline.columns = ['<$2.50 a day', '<$2 a day', '<$4 a day', '<$5 a day', '<$1.25 a day', '<National poverty lines']

plt.figure()
povline.plot(title='Poverty Headcount Ratio at Different Incomes in Cambodia', style = 'o-')
plt.xlabel('Year')
plt.ylabel('% of population')









    Out[31]:





<matplotlib.text.Text at 0x10dfbee10>






    





<matplotlib.figure.Figure at 0x109cc6c90>



In [26]:

    
# The World Bank does not have most of this data for the USA, so I will find US Census data later.

	id	name
5453	IN.POV.HCR.EST.RURL	Poverty HCR Estimates (%) - Rural
5454	IN.POV.HCR.EST.TOTL	Poverty HCR Estimates (%) - Total
5455	IN.POV.HCR.EST.URBN	Poverty HCR Estimates (%) - Urban
7529	SI.POV.25DAY	Poverty headcount ratio at $2.5 a day (PPP) (%...
7530	SI.POV.2DAY	Poverty headcount ratio at $2 a day (PPP) (% o...
7531	SI.POV.4DAY	Poverty headcount ratio at $4 a day (PPP) (% o...
7532	SI.POV.5DAY	Poverty headcount ratio at $5 a day (PPP) (% o...
7533	SI.POV.DDAY	Poverty headcount ratio at $1.25 a day (PPP) (...
7534	SI.POV.GAP2	Poverty gap at $2 a day (PPP) (%)
7535	SI.POV.GAP25	Poverty gap at $2.5 a day (PPP) (%)
7536	SI.POV.GAP4	Poverty gap at $4 a day (PPP) (%)
7537	SI.POV.GAP5	Poverty gap at $5 a day (PPP) (%)
7538	SI.POV.GAPS	Poverty gap at $1.25 a day (PPP) (%)
7540	SI.POV.NAGP	Poverty gap at national poverty lines (%)
7541	SI.POV.NAHC	Poverty headcount ratio at national poverty li...
7547	SI.POV.RUGP	Rural poverty gap at national poverty lines (%)
7548	SI.POV.RUHC	Rural poverty headcount ratio at national pove...
7549	SI.POV.URGP	Urban poverty gap at national poverty lines (%)
7550	SI.POV.URHC	Urban poverty headcount ratio at national pove...
10162	ccx_povchi_40_fem	Poverty headcount of children (below bottom 40...
10163	ccx_povchi_40_mal	Poverty headcount of children (below bottom 40...
10164	ccx_povchi_40_rur	Poverty headcount of children (below bottom 40...
10165	ccx_povchi_40_tot	Poverty headcount of children (below bottom 40%)
10166	ccx_povchi_40_urb	Poverty headcount of children (below bottom 40...
10167	ccx_poveld_40_fem	Poverty headcount of the elderly (below bottom...
10168	ccx_poveld_40_mal	Poverty headcount of the elderly (below bottom...
10169	ccx_poveld_40_rur	Poverty headcount of the elderly (below bottom...
10170	ccx_poveld_40_tot	Poverty headcount of the elderly (below bottom...
10171	ccx_poveld_40_urb	Poverty headcount of the elderly (below bottom...
10172	ccx_povwka_40_fem	Poverty headcount of working age adults (below...
...	...	...
12761	per_si_allsi_p1_ep_preT_tot	Poverty Gap reduction (%) - All Social Insura...
12762	per_si_allsi_p1_ep_tot	Poverty Gap reduction (%) - All Social Insura...
12763	per_si_allsi_p1_preT_tot	Poverty Gap reduction (%) - All Social Insura...
12764	per_si_allsi_p1_rur	Poverty Gap reduction (%) - All Social Insura...
12765	per_si_allsi_p1_tot	Poverty Gap reduction (%) - All Social Insura...
12766	per_si_allsi_p1_urb	Poverty Gap reduction (%) - All Social Insura...
12901	per_si_oa_p0_ep_preT_tot	Poverty Headcount reduction (%) - Old Age Con...
12902	per_si_oa_p0_ep_tot	Poverty Headcount reduction (%) - Old Age Con...
12903	per_si_oa_p0_preT_tot	Poverty Headcount reduction (%) - Old Age Con...
12904	per_si_oa_p0_rur	Poverty Headcount reduction (%) - Old Age Con...
12905	per_si_oa_p0_tot	Poverty Headcount reduction (%) - Old Age Con...
12906	per_si_oa_p0_urb	Poverty Headcount reduction (%) - Old Age Con...
12907	per_si_oa_p1_ep_preT_tot	Poverty Gap reduction (%) - Old Age Contribut...
12908	per_si_oa_p1_ep_tot	Poverty Gap reduction (%) - Old Age Contribut...
12909	per_si_oa_p1_preT_tot	Poverty Gap reduction (%) - Old Age Contribut...
12910	per_si_oa_p1_rur	Poverty Gap reduction (%) - Old Age Contribut...
12911	per_si_oa_p1_tot	Poverty Gap reduction (%) - Old Age Contribut...
12912	per_si_oa_p1_urb	Poverty Gap reduction (%) - Old Age Contribut...
13047	per_si_ss_p0_ep_preT_tot	Poverty Headcount reduction (%) - Other Socia...
13048	per_si_ss_p0_ep_tot	Poverty Headcount reduction (%) - Other Socia...
13049	per_si_ss_p0_preT_tot	Poverty Headcount reduction (%) - Other Socia...
13050	per_si_ss_p0_rur	Poverty Headcount reduction (%) - Other Socia...
13051	per_si_ss_p0_tot	Poverty Headcount reduction (%) - Other Socia...
13052	per_si_ss_p0_urb	Poverty Headcount reduction (%) - Other Socia...
13053	per_si_ss_p1_ep_preT_tot	Poverty Gap reduction (%) - Other Social Insu...
13054	per_si_ss_p1_ep_tot	Poverty Gap reduction (%) - Other Social Insu...
13055	per_si_ss_p1_preT_tot	Poverty Gap reduction (%) - Other Social Insu...
13056	per_si_ss_p1_rur	Poverty Gap reduction (%) - Other Social Insu...
13057	per_si_ss_p1_tot	Poverty Gap reduction (%) - Other Social Insu...
13058	per_si_ss_p1_urb	Poverty Gap reduction (%) - Other Social Insu...

	id	name
7522	SI.DST.02ND.20	Income share held by second 20%
7523	SI.DST.03RD.20	Income share held by third 20%
7524	SI.DST.04TH.20	Income share held by fourth 20%
7525	SI.DST.05TH.20	Income share held by highest 20%
7526	SI.DST.10TH.10	Income share held by highest 10%
7527	SI.DST.FRST.10	Income share held by lowest 10%
7528	SI.DST.FRST.20	Income share held by lowest 20%

		SI.POV.25DAY	SI.POV.2DAY	SI.POV.4DAY	SI.POV.5DAY	SI.POV.DDAY	SI.POV.GAP2	SI.POV.GAP25	SI.POV.GAP4	SI.POV.GAP5	SI.POV.GAPS	...	SI.POV.RUHC	SI.POV.URGP	SI.POV.URHC	SI.DST.02ND.20	SI.DST.03RD.20	SI.DST.04TH.20	SI.DST.05TH.20	SI.DST.10TH.10	SI.DST.FRST.10	SI.DST.FRST.20
country	year
Cambodia	2012	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	20.8	NaN	6.4	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	2011	59.00	41.26	85.70	91.83	10.05	10.25	18.29	39.60	49.51	1.43	...	23.6	NaN	8.7	12.46	16.11	21.24	41.20	26.91	4.04	8.99
	2010	57.59	40.88	84.06	90.63	11.25	10.59	18.37	39.00	48.74	1.70	...	25.3	NaN	8.5	12.02	15.80	21.18	42.49	28.01	3.80	8.51
	2009	56.25	40.74	82.20	89.25	12.93	11.21	18.71	38.50	48.02	2.08	...	27.5	NaN	8.0	11.66	15.68	21.48	43.15	28.20	3.55	8.03
	2008	65.90	51.05	87.33	92.48	20.89	16.27	24.78	45.05	54.08	4.39	...	38.5	NaN	15.1	11.60	15.67	21.43	43.45	28.57	3.44	7.85
	2007	71.05	59.39	87.65	92.07	30.82	21.92	30.64	49.51	57.63	7.24	...	51.4	NaN	18.3	10.05	13.95	20.16	48.89	33.99	3.12	6.95
	2006	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	2005	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
	2004	76.54	64.43	91.55	94.95	32.77	23.64	33.09	52.85	60.96	7.79	...	54.2	NaN	28.5	11.43	15.37	21.22	44.00	29.09	3.56	7.98