In [70]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style(rc={'font.family': ['sans-serif'],'axis.labelsize': 25})
sns.set_context("notebook")
plt.rcParams['figure.figsize'] = (8, 6)
plt.rcParams['axes.labelsize'] = 18
This notebook deals with downloading and processing data on the GDP by country.
We will download the data from the Worldbank. A Makefile
to automate this process can be located in a local directory of this repo ../data_sets/GDP_by_Country_WorldBank/Makefile. For convience here is the file:
In [ ]:
# %load ../data_sets/GDP_by_Country_WorldBank/Makefile
DOWNLOAD = data.zip
OUT = ny.gdp.mktp.cd_Indicator_en_csv_v2.csv
.PHONY: download clean
download:
rm -f ${DOWNLOAD}
wget http://api.worldbank.org/v2/en/indicator/ny.gdp.mktp.cd?downloadformat=csv -O data.zip
unzip $(DOWNLOAD)
rm -f ${DOWNLOAD} Metadata*csv *xml
In [81]:
import pandas as pd
df = pd.read_csv("../data_sets/GDP_by_Country_WorldBank/ny.gdp.mktp.cd_Indicator_en_csv_v2.csv",
quotechar='"', skiprows=2)
df.head()
Out[81]:
In [82]:
colnames_to_drop = df.columns[np.array([2, 3, -2, -1])]
for c in colnames_to_drop:
df.drop(c, 1, inplace=True)
df.head()
Out[82]:
In [109]:
years = [int(i) for i in df.columns.values[2:]]
fig, ax = plt.subplots()
top_5 = df.sort(['2013'], na_position='last', ascending=False).irow(range(5))
for country in df['Country Name']:
if country in top_5['Country Name'].values:
label=country
else:
label=None
ax.plot(years, df[df['Country Name'] == country].values[0, 2:],
label=label)
ax.legend(loc='best')
ax.set_xlabel("Year")
ax.set_ylabel("US dollars")
plt.show()
In [ ]: