Extracting information from World Bank Data

Data cleansing, processing and visualization. A dive into Data Science.


In [19]:
# !pip3 install --upgrade pandas
# !pip3 install --upgrade numpy
# !pip3 install --upgrade matplotlib
# !pip3 install --upgrade statsmodel
# !pip3 install --upgrade sklearn
# !pip3 install --upgrade csvkit
# Ubunt(Debian) fix
# !apt-get install libfreetype6-dev libpng-dev
# !pip3 install l--upgrade python-pip

In [20]:
# Update local machine python dependencies
#!pip3 freeze --local | grep -v '^\-e' | cut -d = -f 1  | xargs -n1 pip3 install -U
#!pip freeze --local | grep -v '^\-e' | cut -d = -f 1  | xargs -n1 pip install -U

In [21]:
!ls -laSh


total 30M
-rw-rw-r-- 1 devola devola  30M Jul 27 12:31 adi_data.csv
-rw-rw-r-- 1 devola devola 8.0K Jul 27 13:47 dev_countries.ipynb
drwxrwxr-x 4 devola devola 4.0K Jul 27 13:47 .
drwxrwxr-x 5 devola devola 4.0K Jul 27 12:30 ..
drwxrwxr-x 8 devola devola 4.0K Jul 27 12:31 .git
drwxr-xr-x 2 devola devola 4.0K Jul 27 12:34 .ipynb_checkpoints
-rw-rw-r-- 1 devola devola 1.1K Jul 27 12:31 LICENSE
-rw-rw-r-- 1 devola devola  764 Jul 27 12:31 .gitignore
-rw-rw-r-- 1 devola devola   27 Jul 27 12:31 README.md

In [22]:
import pandas as pd
import numpy as np
import statsmodels as sm
import matplotlib.plot as plt

from pandas import Series, DataFrame
# from IPython.external.mathjax import install_mathjax


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-22-d0a3ad699329> in <module>()
      2 import numpy as np
      3 import statsmodels as sm
----> 4 import matplotlib.plot as plt
      5 
      6 from pandas import Series, DataFrame

ImportError: No module named 'matplotlib.plot'

In [17]:
pd.plot_params(arange(10))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-7125f00cfc72> in <module>()
----> 1 pd.plot_params(arange(10))

NameError: name 'arange' is not defined

In [ ]:
data = []
data = pd.read_csv('adi_data.csv', na_values=None)

In [ ]:
data.columns

In [ ]:
print(data.head(7))

In [ ]:
type(data)

In [ ]:
#Get the row and column count of data
data.shape

In [ ]:
# Cleanup the NaNNs- change them all to 0
data = data.fillna(0)

In [ ]:
print(data.head(3))

In [ ]:
data.columns

In [ ]:
data.head()

In [ ]:
data.index

In [ ]:
data.values

In [ ]:
data.describe()

In [ ]:
data.sort_index(axis=1, ascending=False)

In [ ]:
data[0:5]

In [ ]:
pd.plot()

In [ ]:
import matplotlib

In [ ]: