In [1]:
import pandas as pd
import numpy as np
import statsmodels as sm
import matplotlib
import folium
import geocoder
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 8)
# from pandas import Series, DataFrame
Populating the interactive namespace from numpy and matplotlib
In [2]:
# !ls -laSh
In [4]:
# from pd import Series, DataFrame
# from IPython.external.mathjax import install_mathjax
data = []
data = pd.read_csv('adi_data.csv', na_values=None)
In [5]:
data.head()
Out[5]:
Country_Name_attr
Country__attrCode
Indicator_Name
Indicator_Code
1960
1961
1962
1963
1964
1965
...
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
0
Africa
AFR
"Fiscal balance, cash surplus/deficit (current...
GC.BAL.CASH.CD
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
1
Africa
AFR
10th pillar: Market size
GCI.10THPILLAR.XQ
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
2
Africa
AFR
11th pillar: Business sophistication
GCI.11THPILLAR.XQ
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
3
Africa
AFR
12th pillar: Innovation
GCI.12THPILLAR.XQ
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
4
Africa
AFR
1st pillar: Institutions
GCI.1STPILLAR.XQ
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
5 rows × 57 columns
In [6]:
data.columns
Out[6]:
Index(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
'Indicator_Code', '1960', '1961', '1962', '1963', '1964', '1965',
'1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',
'1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
'1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
'1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
'2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012'],
dtype='object')
In [7]:
type(data)
Out[7]:
pandas.core.frame.DataFrame
In [8]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104760 entries, 0 to 104759
Data columns (total 57 columns):
Country_Name_attr 104760 non-null object
Country__attrCode 104760 non-null object
Indicator_Name 104760 non-null object
Indicator_Code 104760 non-null object
1960 10458 non-null float64
1961 14011 non-null float64
1962 14641 non-null float64
1963 14666 non-null float64
1964 14986 non-null float64
1965 17986 non-null float64
1966 17588 non-null float64
1967 18048 non-null float64
1968 18120 non-null float64
1969 18227 non-null float64
1970 26426 non-null float64
1971 27110 non-null float64
1972 27419 non-null float64
1973 27231 non-null float64
1974 27480 non-null float64
1975 29182 non-null float64
1976 29695 non-null float64
1977 30226 non-null float64
1978 30217 non-null float64
1979 30456 non-null float64
1980 33416 non-null float64
1981 34095 non-null float64
1982 34593 non-null float64
1983 34618 non-null float64
1984 35613 non-null float64
1985 36854 non-null float64
1986 37306 non-null float64
1987 38104 non-null float64
1988 38355 non-null float64
1989 38807 non-null float64
1990 46599 non-null float64
1991 47061 non-null float64
1992 47965 non-null float64
1993 47752 non-null float64
1994 48497 non-null float64
1995 50633 non-null float64
1996 51332 non-null float64
1997 50967 non-null float64
1998 51863 non-null float64
1999 51804 non-null float64
2000 55009 non-null float64
2001 53990 non-null float64
2002 58135 non-null float64
2003 57781 non-null float64
2004 58664 non-null float64
2005 65084 non-null float64
2006 65715 non-null float64
2007 64897 non-null float64
2008 62472 non-null float64
2009 62854 non-null float64
2010 58877 non-null float64
2011 51970 non-null float64
2012 5022 non-null float64
dtypes: float64(53), object(4)
memory usage: 45.6+ MB
In [9]:
data.describe()
/usr/local/lib/python3.5/dist-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
RuntimeWarning)
Out[9]:
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
...
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
count
1.045800e+04
1.401100e+04
1.464100e+04
1.466600e+04
1.498600e+04
1.798600e+04
1.758800e+04
1.804800e+04
1.812000e+04
1.822700e+04
...
5.778100e+04
5.866400e+04
6.508400e+04
6.571500e+04
6.489700e+04
6.247200e+04
6.285400e+04
5.887700e+04
5.197000e+04
5.022000e+03
mean
9.698293e+09
7.435320e+09
7.321397e+09
7.608746e+09
8.437124e+09
9.466919e+09
1.039587e+10
1.138860e+10
1.224405e+10
1.327963e+10
...
1.443773e+16
1.504945e+16
1.422888e+16
1.537146e+16
1.702430e+16
1.753084e+16
1.090952e+11
1.326100e+11
1.608894e+11
1.844105e+08
std
9.661116e+10
8.329482e+10
8.312047e+10
8.650995e+10
9.573042e+10
1.000417e+11
1.108124e+11
1.189969e+11
1.256872e+11
1.437140e+11
...
3.470481e+18
3.645059e+18
3.629993e+18
3.940443e+18
4.336898e+18
4.381702e+18
1.518539e+12
1.789520e+12
2.121123e+12
1.447996e+09
min
-7.142710e+11
-6.453520e+11
-7.333810e+11
-5.887420e+11
-3.996050e+11
-7.566140e+11
-1.308750e+12
-1.716620e+12
-1.841570e+12
-1.424600e+12
...
-1.247470e+13
-2.217360e+13
-1.357200e+13
-2.205640e+13
-2.716460e+13
-2.820880e+13
-3.996840e+13
-4.139820e+13
-5.797690e+13
0.000000e+00
25%
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
50%
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
75%
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
...
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
max
2.568620e+12
2.386980e+12
2.476740e+12
2.658270e+12
2.815490e+12
3.180670e+12
3.680690e+12
3.816560e+12
4.032870e+12
5.764460e+12
...
8.342230e+20
8.828570e+20
9.260680e+20
1.010130e+21
1.104820e+21
1.095180e+21
9.739010e+13
1.066470e+14
1.241490e+14
3.345706e+10
8 rows × 53 columns
In [10]:
data.columns
Out[10]:
Index(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
'Indicator_Code', '1960', '1961', '1962', '1963', '1964', '1965',
'1966', '1967', '1968', '1969', '1970', '1971', '1972', '1973', '1974',
'1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983',
'1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992',
'1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
'2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012'],
dtype='object')
In [13]:
df = data.groupby(['Country_Name_attr', 'Country__attrCode', 'Indicator_Name',
'Indicator_Code'])
# df.head()
In [14]:
data = data.fillna(0)
In [15]:
data.head()
Out[15]:
Country_Name_attr
Country__attrCode
Indicator_Name
Indicator_Code
1960
1961
1962
1963
1964
1965
...
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
0
Africa
AFR
"Fiscal balance, cash surplus/deficit (current...
GC.BAL.CASH.CD
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1
Africa
AFR
10th pillar: Market size
GCI.10THPILLAR.XQ
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
2
Africa
AFR
11th pillar: Business sophistication
GCI.11THPILLAR.XQ
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
3
Africa
AFR
12th pillar: Innovation
GCI.12THPILLAR.XQ
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
4
Africa
AFR
1st pillar: Institutions
GCI.1STPILLAR.XQ
0.0
0.0
0.0
0.0
0.0
0.0
...
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
5 rows × 57 columns
In [16]:
data.index
count_nations = df.count()
count_nations.head()
Out[16]:
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
...
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
Country_Name_attr
Country__attrCode
Indicator_Name
Indicator_Code
Africa
AFR
"Fiscal balance, cash surplus/deficit (current US$)\r"
GC.BAL.CASH.CD
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
10th pillar: Market size
GCI.10THPILLAR.XQ
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
11th pillar: Business sophistication
GCI.11THPILLAR.XQ
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
12th pillar: Innovation
GCI.12THPILLAR.XQ
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
1st pillar: Institutions
GCI.1STPILLAR.XQ
0
0
0
0
0
0
0
0
0
0
...
0
0
0
0
0
0
0
0
0
0
5 rows × 53 columns
In [ ]:
data.shape
In [ ]:
data.sort_index(axis=1, ascending=False)
data[0:6]
In [ ]:
In [ ]:
Content source: searchs/bigdatabox
Similar notebooks: