In [173]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

In [174]:
path = r'C:\Users\hrao\Documents\Personal\HK\Python\world-development-indicators\Indicators.csv'
data = pd.read_csv(path)
data.shape


Out[174]:
(5656458, 6)

In [175]:
countries = data['CountryName'].unique().tolist()
indicators = data['IndicatorName'].unique().tolist()

In [176]:
data.head(2)


Out[176]:
CountryName CountryCode IndicatorName IndicatorCode Year Value
0 Arab World ARB Adolescent fertility rate (births per 1,000 wo... SP.ADO.TFRT 1960 133.560907
1 Arab World ARB Age dependency ratio (% of working-age populat... SP.POP.DPND 1960 87.797601

In [177]:
yearsFilter = [2010, 2011, 2012, 2013, 2014]

In [178]:
yearsFilter


Out[178]:
[2010, 2011, 2012, 2013, 2014]

In [179]:
countryFilter = random.sample(countries, 2)
countryFilter


Out[179]:
['Ireland', 'Congo, Dem. Rep.']

In [180]:
indicatorsFilter = random.sample(indicators,1)
indicatorsFilter


Out[180]:
['Gross intake ratio to Grade 1 of primary education, both sexes (%)']

In [181]:
filterMesh = (data['IndicatorName'].isin(indicatorsFilter)) & (data['CountryName'] == countryFilter[0]) & (data['Year'].isin(yearsFilter))

In [182]:
country1_data = data.loc[filterMesh]

In [183]:
len(country1_data)


Out[183]:
3

In [184]:
filterMesh = (data['IndicatorName'].isin(indicatorsFilter)) & (data['CountryName'] == countryFilter[1]) & (data['Year'].isin(yearsFilter))

In [185]:
country2_data = data.loc[filterMesh]

In [186]:
len(country2_data)


Out[186]:
4

In [187]:
filteredData1 = []
filteredData2 = []

In [188]:
while len(filteredData1) < len(yearsFilter)-1:
    indicatorsFilter = random.sample(indicators, 1)
    countryFilter = random.sample(countries, 2)
    
    filterMesh = (data['CountryName'] == countryFilter[0]) & (data['IndicatorName'].isin(indicatorsFilter)) & (data['Year'].isin(yearsFilter))
    filteredData1 = data.loc[filterMesh]
    filteredData1 = filteredData1[['CountryName','IndicatorName','Year','Value']]
    
    if len(filteredData1) < len(yearsFilter)-1:
        print('Skipping ... %s since very few rows (%d) found' % (indicatorsFilter, len(filteredData1)))


Skipping ... ['Merchandise exports to developing economies within region (% of total merchandise exports)'] since very few rows (0) found
Skipping ... ['Lead time to import, median case (days)'] since very few rows (3) found

In [189]:
indicatorsFilter


Out[189]:
['Gross value added at factor cost (constant 2005 US$)']

In [190]:
len(filteredData1)


Out[190]:
5

In [191]:
while len(filteredData2) < len(yearsFilter)-1:
    indicatorsFilter = random.sample(indicators, 1)
    countryFilter = random.sample(countries, 2)
    
    filterMesh = (data['IndicatorName'].isin(indicatorsFilter)) & (data['CountryName']==countryFilter[1]) & (data['Year'].isin(yearsFilter))
    filteredData2 = data.loc[filterMesh]
    filteredData2 = filteredData2[['CountryName','IndicatorName','Year','Value']]
    
    old = countryFilter[1]
    countryFilter = random.sample(countries, 1)[0]
    
    if len(filteredData2) < len(yearsFilter)-1:
        print('Skipping .... %s since very few rows (%d) found' % (old, len(filteredData2)))


Skipping .... Iran, Islamic Rep. since very few rows (0) found

In [192]:
len(filteredData2)


Out[192]:
5

In [193]:
if len(filteredData1) < len(filteredData2):
    small = len(filteredData1)
else:
    small = len(filteredData2)

In [194]:
filteredData1 = filteredData1[0:small]
filteredData2 = filteredData2[0:small]

In [195]:
filteredData1


Out[195]:
CountryName IndicatorName Year Value
4850013 Least developed countries: UN classification Gross value added at factor cost (constant 200... 2010 4.094362e+11
5036441 Least developed countries: UN classification Gross value added at factor cost (constant 200... 2011 4.335242e+11
5212544 Least developed countries: UN classification Gross value added at factor cost (constant 200... 2012 4.583464e+11
5386778 Least developed countries: UN classification Gross value added at factor cost (constant 200... 2013 4.833953e+11
5539926 Least developed countries: UN classification Gross value added at factor cost (constant 200... 2014 5.120469e+11

In [196]:
filteredData2


Out[196]:
CountryName IndicatorName Year Value
5000556 St. Vincent and the Grenadines PPG, multilateral (AMT, current US$) 2010 11692000.0
5178554 St. Vincent and the Grenadines PPG, multilateral (AMT, current US$) 2011 11354000.0
5353977 St. Vincent and the Grenadines PPG, multilateral (AMT, current US$) 2012 13728000.0
5512881 St. Vincent and the Grenadines PPG, multilateral (AMT, current US$) 2013 13837000.0
5627315 St. Vincent and the Grenadines PPG, multilateral (AMT, current US$) 2014 12614000.0

In [197]:
%matplotlib inline
import matplotlib.pyplot as plt

fig, axis = plt.subplots()

axis.yaxis.grid(True)
axis.set_title(indicatorsFilter[0], fontsize=10)
axis.set_xlabel(filteredData1['CountryName'].iloc[0], fontsize=10)
axis.set_ylabel(filteredData2['CountryName'].iloc[0], fontsize=10)

X = filteredData1['Value']
Y = filteredData2['Value']

axis.scatter(X, Y)


Out[197]:
<matplotlib.collections.PathCollection at 0x21b013270f0>

In [198]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(20, 10))

ax.set_ylim(min(0, filteredData1['Value'].min()), 2*filteredData1['Value'].max())
ax.set_title('Indicator Name : ' + indicatorsFilter[0])
ax.plot(filteredData1['Year'], filteredData1['Value'], 'r--', label=filteredData1['CountryName'].unique())

legend = plt.legend(loc='upper center', 
                    shadow=True, 
                    prop={'weight':'roman', 'size':'xx-large'})

frame = legend.get_frame()
frame.set_facecolor('.95')
plt.show()



In [199]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(20, 10))

ax.set_ylim(min(0,filteredData2['Value'].min()), 2*filteredData2['Value'].max())

ax.set_title('Indicator Name : ' + indicatorsFilter[0])
ax.plot(filteredData2['Year'], filteredData2['Value'],
       label = filteredData2['CountryName'].unique(),
       color = 'purple', lw=1, ls='-',
       marker='s', markersize=20,
       markerfacecolor='yellow', markeredgewidth=4, markeredgecolor='blue')

legend = plt.legend(loc = 'upper left', 
                   shadow=True, 
                   prop={'weight':'roman','size':'xx-large'})

frame = legend.get_frame()
frame.set_facecolor('.95')

plt.show()



In [203]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import numpy as np

countof_angles = 36
countof_radii = 8

array_rad = np.linspace(0.125, 1.0, countof_radii)

array_ang = np.linspace(0, 2*np.pi, countof_angles, endpoint=False)

array_ang = np.repeat(array_ang[...,np.newaxis], countof_radii, axis=1)

x = np.append(0, (array_rad*np.cos(array_ang)).flatten())
y = np.append(0, (array_rad*np.sin(array_ang)).flatten())

z = np.sin(-x*y)

fig = plt.figure(figsize=(20, 10))
ax = fig.gca(projection='3d')

ax.plot_trisurf(x, y, z, cmap=cm.autumn, linewidth=0.2)

plt.show()
fig.savefig('vis_3d.png')



In [209]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

n_points = 1000

radius = 2*np.random.rand(n_points)
angles = 2 * (np.pi) * np.random.rand(n_points)
area = 400 * (radius**2) * np.random.rand(n_points)

colors = angles

fig = plt.figure(figsize=(20, 10))
ax = plt.subplot(111, polar=True)

c = plt.scatter(angles, radius, c=colors, s=area, cmap=plt.cm.hsv)
c.set_alpha(1.95)

plt.show()
fig.savefig('vis_bubbleplot.png')



In [211]:
np.random.seed(452)

A1 = np.random.normal(0, 1, 100)
A2 = np.random.normal(0, 2, 100)
A3 = np.random.normal(0, 1.5, 100)

data = [A1, A2, A3]

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))

bplot1 = axes[1].boxplot(data, 
                        notch=True,
                        vert=True,
                        patch_artist=True)

bplot2 = axes[0].boxplot(data,
                        vert=True,
                        patch_artist=True)

colors = ['tomato', 'darkorchid', 'lime']

for bplot in (bplot1, bplot2):
    for patch, color in zip(bplot['boxes'], colors):
        patch.set_facecolor(color)
        
for axis in axes:
    axis.yaxis.grid(True)
    axis.set_xticks([y for y in range(len(data))],)
    axis.set_xlabel('Sample X-Label', fontsize=20)
    axis.set_ylabel('Sample Y-Label', fontsize=20)

plt.setp(axes, xticks = [y for y in range(len(data))], 
         xticklabels=['X1','X2','X3'])

plt.show()
fig.savefig('vis_boxplot.png')



In [ ]: