In [1]:
# Data Source: https://www.kaggle.com/worldbank/world-development-indicators
# Folder: 'world-development-indicators'
In [2]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
In [3]:
data = pd.read_csv('./world-development-indicators/Indicators.csv')
data.shape
Out[3]:
In [4]:
data.head()
Out[4]:
It has different indicators for different countries with the year and value of the indicator.
In [5]:
# How many years of data do we have ?
years = data['Year'].unique().tolist()
len(years)
Out[5]:
In [6]:
print(min(years)," to ",max(years))
In [7]:
data.set_index(['Year']).head()
Out[7]:
In [8]:
# mask_export = data['IndicatorCode'] == 'NE.EXP.GNFS.CD' TX.VAL.MRCH.CD.WT
mask_export = data['IndicatorCode'] == 'TX.VAL.MRCH.CD.WT'
mask_usa = data['CountryCode'] == 'USA'
mask_chn = data['CountryCode'] == 'CHN'
Let's apply the mask for USA:
In [9]:
data[mask_export & mask_usa].set_index(['Year']).head()
Out[9]:
In [10]:
# export columns for China and USA, indexed on Year, export values in billion dollars
exports = pd.DataFrame({}, index=data.Year.unique())
exports['ExportChn'] = data[mask_export & mask_chn].set_index(['Year']).Value / 1e9
exports['ExportUsa'] = data[mask_export & mask_usa].set_index(['Year']).Value / 1e9
exports.head()
Out[10]:
In [11]:
# exports of China related to the export of the USA
exports['ChnWrtUsa'] = exports.ExportChn / exports.ExportUsa
exports['UsaWrtUsa'] = 1
# changes of the exports year by year
exports['deltaChn'] = exports.ExportChn.diff()
exports['deltaUsa'] = exports.ExportUsa.diff()
# percent changes of the exports
exports['percentDeltaChn'] = exports.deltaChn / exports.ExportChn * 100
exports['percentDeltaUsa'] = exports.deltaUsa / exports.ExportUsa * 100
# smoothen the percent changes of the exports by taking moving averages over 10 years
exports['percentDeltaChnMA'] = exports.percentDeltaChn.rolling(window=10, center=True).mean()
exports['percentDeltaUsaMA'] = exports.percentDeltaUsa.rolling(window=10, center=True).mean()
exports.loc[2000:2005, :]
Out[11]:
In [12]:
exports.loc[2000:, ['ExportUsa', 'ExportChn', 'percentDeltaUsa', 'percentDeltaChn']].dropna()
Out[12]:
In 2007 China took over the leading role of merchandise exporter
In [13]:
plt.gcf().set_size_inches(16, 12)
# yearly evolution of the exports of China and the USA
plt.subplot(221)
plt.plot(exports['ExportUsa'], label="USA")
plt.plot(exports['ExportChn'], label="China")
plt.title('Merchandise Exports: USA vs China')
plt.ylabel('export (billion USD)')
plt.legend()
# How China catched up
plt.subplot(222)
plt.plot(exports['UsaWrtUsa'], label="USA")
plt.plot(exports['ChnWrtUsa'], label="China")
plt.title('China export relative to USA export')
# Evolution of the year on year export growth
plt.subplot(223)
plt.plot(exports['percentDeltaUsaMA'], label="USA")
plt.plot(exports['percentDeltaChnMA'], label="China")
plt.title('Year on year export growth (%) 10 years moving average')
# Distribution of the yearly growth percentages for both countries
plt.subplot(224)
plt.hist(exports['percentDeltaUsa'].dropna(), 10, normed=False, label='USA', alpha=0.5)
plt.hist(exports['percentDeltaChn'].dropna(), 10, normed=False, label='China', alpha=0.5)
plt.xlabel('Yearly growth (%) of export')
plt.ylabel('number of years')
plt.title('Distribution of yearly export growths')
plt.show()