In [1]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn

In [2]:
gdp_per_capita = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/lifesat/gdp_per_capita.csv",thousands=',',delimiter='\t',
                             encoding='latin1', na_values="n/a")

In [3]:
gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
gdp_per_capita.set_index("Country", inplace=True)
gdp_per_capita = gdp_per_capita[['GDP per capita','Estimates Start After']]
gdp_per_capita.head(2)


Out[3]:
GDP per capita Estimates Start After
Country
Afghanistan 599.994 2013.0
Albania 3995.383 2010.0

In [4]:
gdp_per_capita.sort_values(by='GDP per capita', ascending=False).head(3)


Out[4]:
GDP per capita Estimates Start After
Country
Luxembourg 101994.093 2014.0
Switzerland 80675.308 2015.0
Qatar 76576.080 2014.0

In [5]:
# remove extra line
gdp_per_capita = gdp_per_capita.drop('International Monetary Fund, World Economic Outlook Database, April 2016')

In [6]:
gdp_per_capita.sort_values(by='GDP per capita', ascending=False).tail(5)


Out[6]:
GDP per capita Estimates Start After
Country
Central African Republic 334.870 2004.0
Burundi 305.783 0.0
South Sudan 220.860 2008.0
Kosovo NaN NaN
Syria NaN 2010.0