In [1]:
import pandas as pd
import numpy as np
import statsmodels as sm
import matplotlib
import folium
import geocoder
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 8)
# from pandas import Series, DataFrame


Populating the interactive namespace from numpy and matplotlib

In [2]:
# !ls -laSh

In [4]:
# from pd import Series, DataFrame
# from IPython.external.mathjax import install_mathjax
data = []
data = pd.read_csv('adi_data.csv', na_values=None)

In [5]:
data.head()


Out[5]:
Country_Name_attr Country__attrCode Indicator_Name Indicator_Code 1960 1961 1962 1963 1964 1965 ... 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
0 Africa AFR "Fiscal balance, cash surplus/deficit (current... GC.BAL.CASH.CD NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Africa AFR 10th pillar: Market size GCI.10THPILLAR.XQ NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 Africa AFR 11th pillar: Business sophistication GCI.11THPILLAR.XQ NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 Africa AFR 12th pillar: Innovation GCI.12THPILLAR.XQ NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Africa AFR 1st pillar: Institutions GCI.1STPILLAR.XQ NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 57 columns


In [6]:
data.columns


Out[6]:
Index(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
       'Indicator_Code', '1960', '1961', '1962', '1963', '1964', '1965',
       '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',
       '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
       '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
       '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
       '2011', '2012'],
      dtype='object')

In [7]:
type(data)


Out[7]:
pandas.core.frame.DataFrame

In [8]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104760 entries, 0 to 104759
Data columns (total 57 columns):
Country_Name_attr    104760 non-null object
Country__attrCode    104760 non-null object
Indicator_Name       104760 non-null object
Indicator_Code       104760 non-null object
1960                 10458 non-null float64
1961                 14011 non-null float64
1962                 14641 non-null float64
1963                 14666 non-null float64
1964                 14986 non-null float64
1965                 17986 non-null float64
1966                 17588 non-null float64
1967                 18048 non-null float64
1968                 18120 non-null float64
1969                 18227 non-null float64
1970                 26426 non-null float64
1971                 27110 non-null float64
1972                 27419 non-null float64
1973                 27231 non-null float64
1974                 27480 non-null float64
1975                 29182 non-null float64
1976                 29695 non-null float64
1977                 30226 non-null float64
1978                 30217 non-null float64
1979                 30456 non-null float64
1980                 33416 non-null float64
1981                 34095 non-null float64
1982                 34593 non-null float64
1983                 34618 non-null float64
1984                 35613 non-null float64
1985                 36854 non-null float64
1986                 37306 non-null float64
1987                 38104 non-null float64
1988                 38355 non-null float64
1989                 38807 non-null float64
1990                 46599 non-null float64
1991                 47061 non-null float64
1992                 47965 non-null float64
1993                 47752 non-null float64
1994                 48497 non-null float64
1995                 50633 non-null float64
1996                 51332 non-null float64
1997                 50967 non-null float64
1998                 51863 non-null float64
1999                 51804 non-null float64
2000                 55009 non-null float64
2001                 53990 non-null float64
2002                 58135 non-null float64
2003                 57781 non-null float64
2004                 58664 non-null float64
2005                 65084 non-null float64
2006                 65715 non-null float64
2007                 64897 non-null float64
2008                 62472 non-null float64
2009                 62854 non-null float64
2010                 58877 non-null float64
2011                 51970 non-null float64
2012                 5022 non-null float64
dtypes: float64(53), object(4)
memory usage: 45.6+ MB

In [9]:
data.describe()


/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[9]:
1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
count 1.045800e+04 1.401100e+04 1.464100e+04 1.466600e+04 1.498600e+04 1.798600e+04 1.758800e+04 1.804800e+04 1.812000e+04 1.822700e+04 ... 5.778100e+04 5.866400e+04 6.508400e+04 6.571500e+04 6.489700e+04 6.247200e+04 6.285400e+04 5.887700e+04 5.197000e+04 5.022000e+03
mean 9.698293e+09 7.435320e+09 7.321397e+09 7.608746e+09 8.437124e+09 9.466919e+09 1.039587e+10 1.138860e+10 1.224405e+10 1.327963e+10 ... 1.443773e+16 1.504945e+16 1.422888e+16 1.537146e+16 1.702430e+16 1.753084e+16 1.090952e+11 1.326100e+11 1.608894e+11 1.844105e+08
std 9.661116e+10 8.329482e+10 8.312047e+10 8.650995e+10 9.573042e+10 1.000417e+11 1.108124e+11 1.189969e+11 1.256872e+11 1.437140e+11 ... 3.470481e+18 3.645059e+18 3.629993e+18 3.940443e+18 4.336898e+18 4.381702e+18 1.518539e+12 1.789520e+12 2.121123e+12 1.447996e+09
min -7.142710e+11 -6.453520e+11 -7.333810e+11 -5.887420e+11 -3.996050e+11 -7.566140e+11 -1.308750e+12 -1.716620e+12 -1.841570e+12 -1.424600e+12 ... -1.247470e+13 -2.217360e+13 -1.357200e+13 -2.205640e+13 -2.716460e+13 -2.820880e+13 -3.996840e+13 -4.139820e+13 -5.797690e+13 0.000000e+00
25% NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
50% NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
75% NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
max 2.568620e+12 2.386980e+12 2.476740e+12 2.658270e+12 2.815490e+12 3.180670e+12 3.680690e+12 3.816560e+12 4.032870e+12 5.764460e+12 ... 8.342230e+20 8.828570e+20 9.260680e+20 1.010130e+21 1.104820e+21 1.095180e+21 9.739010e+13 1.066470e+14 1.241490e+14 3.345706e+10

8 rows × 53 columns


In [10]:
data.columns


Out[10]:
Index(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
       'Indicator_Code', '1960', '1961', '1962', '1963', '1964', '1965',
       '1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',
       '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
       '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
       '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
       '2011', '2012'],
      dtype='object')

In [13]:
df = data.groupby(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
       'Indicator_Code'])
# df.head()

In [14]:
data = data.fillna(0)

In [15]:
data.head()


Out[15]:
Country_Name_attr Country__attrCode Indicator_Name Indicator_Code 1960 1961 1962 1963 1964 1965 ... 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
0 Africa AFR "Fiscal balance, cash surplus/deficit (current... GC.BAL.CASH.CD 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Africa AFR 10th pillar: Market size GCI.10THPILLAR.XQ 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Africa AFR 11th pillar: Business sophistication GCI.11THPILLAR.XQ 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Africa AFR 12th pillar: Innovation GCI.12THPILLAR.XQ 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 Africa AFR 1st pillar: Institutions GCI.1STPILLAR.XQ 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 57 columns


In [16]:
data.index
count_nations = df.count()
count_nations.head()


Out[16]:
1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ... 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
Country_Name_attr Country__attrCode Indicator_Name Indicator_Code
Africa AFR "Fiscal balance, cash surplus/deficit (current US$)\r" GC.BAL.CASH.CD 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10th pillar: Market size GCI.10THPILLAR.XQ 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11th pillar: Business sophistication GCI.11THPILLAR.XQ 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12th pillar: Innovation GCI.12THPILLAR.XQ 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1st pillar: Institutions GCI.1STPILLAR.XQ 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 53 columns


In [ ]:
data.shape

In [ ]:
data.sort_index(axis=1, ascending=False)
data[0:6]

In [ ]:


In [ ]: