In [ ]:
# The goal of this coding project is to analyze various statistics about the nations of the world.
# There are approximately 20 information about each country.
# I have used pairwise correlation, mapping, linear regression model, ranking function, 
# distance function, and K nearest neighbor function to find meaningful patterns.
# If you want to contribute to the project, contact Kris Pan via kriskwpan@gmail.com

In [1]:
import numpy as np
import pandas as pd

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
fort = pd.read_csv('fort.csv')

In [4]:
# Checking if csv was loaded correctly
fort


Out[4]:
Country Name Country Code Government Type Capital City Date of Founding/Independence Latitude of Capital Longitude of Capital Population Life Expectancy GDP (PPP) in US $ ... Literacy Rate (%) Health Expenditure/GDP Military Expenditure/GDP Renewable Water Resources (cu km) Net Migration Rate/1000 Population Labor Force Internet Users (circa 2009) Commercial Prime Lending Rate Unnamed: 21 Gov Type Code
0 Afghanistan AFG Islamic republic Kabul 8/19/1919 34 31 N 69 11 E 31822848 50.49 4.530000e+10 ... 28.1 9.6 4.74 65.33 -1.83 7512000.0 1000000.0 15.00 NaN 1
1 Albania ALB parliamentary democracy Tirana 11/28/1912 41 19 N 19 49 E 3020209 77.96 2.834000e+10 ... 96.8 6.3 1.47 41.70 -3.31 1098000.0 1300000.0 9.52 NaN 0
2 Algeria DZA republic Algiers 7/5/1962 36 45 N 3 03 E 38813722 76.39 2.850000e+11 ... 72.6 3.9 4.48 11.67 -0.93 11150000.0 4700000.0 8.00 NaN 1
3 Andorra AND parliamentary democracy Andorra la Vella 1278 42 30 N 1 31 E 85458 82.65 3.163000e+09 ... 100.0 7.2 NaN NaN 0.00 36060.0 67100.0 NaN NaN 0
4 Angola AGO republic Luanda 11/11/1975 8 50 S 13 13 E 19088106 55.29 1.320000e+11 ... 70.4 3.5 3.63 148.00 0.47 9018000.0 606700.0 15.00 NaN 1
5 Antigua and Barbuda ATG constitutional monarchy Saint John's 11/1/1981 17 07 N 61 51 W 91295 76.12 1.610000e+09 ... 99.0 5.9 NaN 0.05 2.23 30000.0 65000.0 10.30 NaN 0
6 Argentina ARG republic Buenos Aires 7/9/1816 34 35 S 58 40 W 43024374 77.51 7.710000e+11 ... 97.9 8.1 0.91 814.00 0.00 17320000.0 13694000.0 16.40 NaN 1
7 Armenia ARM republic Yerevan 9/21/1991 40 10 N 44 30 E 3060631 74.12 2.061000e+10 ... 99.6 4.3 3.92 7.77 -5.88 1394000.0 208200.0 16.50 NaN 1
8 Australia AUS federal parliamentary democracy Canberra 1/1/1901 35 16 S 149 08 E 22507617 82.07 9.980000e+11 ... 99.0 9.0 1.71 492.00 5.74 12440000.0 15810000.0 6.20 NaN 0
9 Austria AUT federal republic Vienna 11/12/1918 48 12 N 16 22 E 8223062 80.17 3.610000e+11 ... 98.0 10.6 0.81 77.70 1.76 3737000.0 6143000.0 2.20 NaN 1
10 Azerbaijan AZE republic Baku 8/30/1991 40 23 N 49 52 E 9686210 71.91 1.030000e+11 ... 99.8 5.2 4.64 34.68 0.00 4680000.0 2420000.0 17.00 NaN 1
11 Bahamas, The BHS constitutional parliamentary democracy Nassau 7/10/1973 25 05 N 77 21 W 321834 71.93 1.140000e+10 ... 95.6 7.7 NaN 0.02 0.00 196900.0 115800.0 4.75 NaN 0
12 Bahrain BHR constitutional monarchy Manama 8/15/1971 26 14 N 50 34 E 1314089 78.58 3.496000e+10 ... 94.6 3.8 3.14 0.12 13.60 716500.0 419500.0 6.80 NaN 0
13 Bangladesh BGD parliamentary democracy Dhaka 12/16/1971 23 43 N 90 24 E 166280712 70.65 3.250000e+11 ... 57.7 3.7 1.35 1227.00 -0.02 78620000.0 617300.0 13.00 NaN 0
14 Barbados BRB parliamentary democracy Bridgetown 11/30/1966 13 06 N 59 37 W 289680 74.99 7.004000e+09 ... 99.7 7.7 NaN 0.08 -0.30 141800.0 188000.0 8.50 NaN 0
15 Belarus BLR republic Minsk 8/25/1991 53 54 N 27 34 E 9608058 72.15 1.500000e+11 ... 99.6 5.3 1.20 58.00 0.78 5000000.0 2643000.0 10.00 NaN 1
16 Belgium BEL federal parliamentary democracy under a consti... Brussels 10/4/1830 50 50 N 4 20 E 10449361 79.92 4.220000e+11 ... 99.0 10.6 1.05 18.30 1.22 5150000.0 8113000.0 3.50 NaN 0
17 Belize BLZ parliamentary democracy Belmopan 9/21/1981 17 15 N 88 46 W 340844 68.49 3.083000e+09 ... 76.9 5.7 1.08 18.55 0.00 120500.0 36000.0 11.80 NaN 0
18 Benin BEN republic Porto-Novo 8/1/1960 6 29 N 2 37 E 10160556 61.07 1.665000e+10 ... 42.4 4.6 1.03 26.39 0.00 3662000.0 200100.0 NaN NaN 1
19 Bhutan BTN constitutional monarchy Thimphu 1907 27 28 N 89 38 E 733643 68.98 5.235000e+09 ... 52.8 4.1 NaN 78.00 0.00 336400.0 50000.0 14.00 NaN 0
20 Bolivia BOL republic La Paz 8/6/1825 16 30 S 68 09 W 10631486 68.55 5.911000e+10 ... 91.2 4.9 1.47 622.50 -0.69 4922000.0 1103000.0 11.41 NaN 1
21 Bosnia and Herzegovina BIH federal democratic republic Sarajevo 3/1/1992 43 52 N 18 25 E 3871643 76.33 3.216000e+10 ... 98.0 10.2 1.35 37.50 -0.38 1490000.0 1422000.0 6.73 NaN 1
22 Botswana BWA parliamentary republic Gaborone 9/30/1966 24 38 S 25 54 E 2155784 54.06 3.400000e+10 ... 85.1 5.1 2.31 12.24 4.62 1308000.0 120000.0 10.00 NaN 0
23 Brazil BRA federal republic Brasilia 9/7/1822 15 47 S 47 55 W 202656788 73.28 2.420000e+12 ... 90.4 8.9 1.47 8233.00 -0.15 107300000.0 75982000.0 26.90 NaN 1
24 Brunei VGB constitutional sultanate Bandar Seri Begawan 1/1/1984 4 53 N 114 56 E 422675 76.77 2.225000e+10 ... 95.4 2.5 2.43 8.50 2.47 205800.0 314900.0 5.50 NaN 0
25 Bulgaria BGR parliamentary democracy Sofia 3/3/1878 42 41 N 23 19 E 6924716 74.33 1.050000e+11 ... 98.4 7.6 1.46 21.30 -2.89 2551000.0 3395000.0 9.10 NaN 0
26 Burkina Faso BFA parliamentary republic Ouagadougou 8/5/1960 12 22 N 1 31 W 18365123 54.78 2.651000e+10 ... 28.7 6.5 1.39 12.50 0.00 6668000.0 178100.0 NaN NaN 0
27 Burundi BDI republic Bujumbura 7/1/1962 3 22 S 29 21 E 10395931 59.55 5.750000e+09 ... 67.2 8.7 2.39 12.54 0.00 4245000.0 157800.0 13.70 NaN 1
28 Cabo Verde BDI republic Praia 7/5/1975 14 55 N 23 31 W 538535 71.57 2.222000e+09 ... 84.9 4.8 0.51 0.30 -0.64 196100.0 150000.0 10.10 NaN 1
29 Cambodia KHM multiparty democracy under a constitutional mo... Phnom Penh 11/9/1953 11 33 N 104 55 E 15458332 63.78 3.964000e+10 ... 73.9 5.7 1.54 476.10 -0.32 7900000.0 78500.0 13.00 NaN 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
167 Swaziland SWZ monarchy Mbabane 9/6/1968 26 19 S 31 08 E 1419623 50.54 6.259000e+09 ... 87.8 8.0 3.17 4.51 0.00 424100.0 90100.0 8.50 NaN 1
168 Sweden SWE constitutional monarchy Stockholm 6/26/1523 59 20 N 18 03 E 9723809 81.89 3.940000e+11 ... 99.0 9.4 1.18 174.00 5.46 5107000.0 8398000.0 3.30 NaN 0
169 Switzerland CHE formally a confederation but similar in struct... Bern 8/15/1291 46 55 N 7 28 E 8061516 82.39 3.710000e+11 ... 99.0 10.9 0.76 53.50 5.43 4976000.0 6152000.0 2.70 NaN 1
170 Syria CHE republic under an authoritarian regime Damascus 4/17/1946 33 30 N 36 18 E 17951639 68.41 1.080000e+11 ... 84.1 3.7 NaN 16.80 -113.51 5014000.0 4469000.0 10.50 NaN 1
171 Taiwan SYR multiparty democracy Taipei 1949 25 02 N 121 31 E 23359928 79.84 9.260000e+11 ... 96.1 NaN NaN 67.00 0.90 11550000.0 16147000.0 2.90 NaN 0
172 Tajikistan TJK republic Dushanbe 9/9/1991 38 33 N 68 46 E 8051512 67.06 1.920000e+10 ... 99.7 5.8 NaN 21.91 -1.17 2209000.0 700000.0 22.00 NaN 1
173 Tanzania TJK republic Dar es Salaam 4/26/1964 6 48 S 39 17 E 49639138 61.24 7.929000e+10 ... 67.8 7.3 1.13 96.27 -0.57 25590000.0 678000.0 13.60 NaN 1
174 Thailand THA constitutional monarchy Bangkok 1238 13 45 N 100 31 E 67741401 74.18 6.730000e+11 ... 93.5 4.1 1.47 438.60 0.00 39380000.0 17483000.0 6.90 NaN 0
175 Timor-Leste TLS republic Dili 11/28/1975 8 35 S 125 36 E 1201542 67.39 2.541000e+10 ... 58.3 5.1 2.92 NaN -3.87 418200.0 2100.0 12.30 NaN 1
176 Togo TGO republic under transition to multiparty democr... Lome 4/27/1960 6 07 N 1 13 E 7351374 64.06 7.348000e+09 ... 60.4 8.0 1.60 14.70 0.00 2595000.0 356300.0 NaN NaN 1
177 Tonga TON constitutional monarchy Nuku'alofa 6/4/1970 21 08 S 175 12 W 106440 75.82 8.460000e+08 ... 99.0 5.3 NaN NaN -17.85 39960.0 8400.0 10.90 NaN 0
178 Trinidad and Tobago TTO parliamentary democracy Port of Spain 8/31/1962 10 39 N 61 31 W 1223916 72.29 2.714000e+10 ... 98.8 5.7 NaN 3.84 -6.42 621000.0 593000.0 7.50 NaN 0
179 Tunisia TUN republic Tunis 3/20/1956 36 48 N 10 11 E 10937521 75.68 1.080000e+11 ... 79.1 6.2 1.55 4.60 -1.74 3974000.0 3500000.0 7.31 NaN 1
180 Turkey TUR republican parliamentary democracy Ankara 10/29/1923 39 56 N 32 52 E 81619392 73.29 1.170000e+12 ... 94.1 6.7 2.31 211.60 0.46 27910000.0 27233000.0 18.50 NaN 0
181 Turkmenistan TKM secular democracy Ashgabat 10/27/1991 37 57 N 58 23 E 5171943 69.47 5.516000e+10 ... 99.6 2.7 NaN 24.77 -1.86 2300000.0 80400.0 NaN NaN 0
182 Tuvalu TUV parliamentary democracy Funafuti 10/1/1978 8 31 S 179 13 E 10782 65.81 4.000000e+07 ... NaN 17.3 NaN NaN -6.86 3615.0 NaN NaN NaN 0
183 Uganda UGA republic Kampala 10/9/1962 0 19 N 32 33 E 35918915 54.46 5.437000e+10 ... 73.2 9.5 1.45 66.00 -0.76 17400000.0 3200000.0 23.70 NaN 1
184 Ukraine UKR republic Kyiv 8/24/1991 50 26 N 30 31 E 44291413 69.14 3.370000e+11 ... 99.7 7.3 2.77 139.60 -0.06 22170000.0 7770000.0 16.00 NaN 1
185 United Arab Emirates ARE federation with specified powers delegated to ... Abu Dhabi 12/2/1971 24 28 N 54 22 E 5628805 77.09 2.700000e+11 ... 90.0 3.3 5.50 0.15 13.58 4588000.0 3449000.0 NaN NaN 1
186 United Kingdom ARE constitutional monarchy London 4/12/1927 51 30 N 0 05 W 63742977 80.42 2.390000e+12 ... 99.0 9.3 2.49 147.00 2.56 30150000.0 51444000.0 4.40 NaN 0
187 United States TZA constitution-based federal republic; strong de... Washington, DC 7/4/1776 38 53 N 77 02 W 318892103 79.56 1.670000e+13 ... 99.0 17.9 4.35 3069.00 2.45 155400000.0 245000000.0 3.30 NaN 0
188 Uruguay URY constitutional republic Montevideo 8/25/1825 34 51 S 56 10 W 3332972 76.81 5.627000e+10 ... 98.1 8.0 1.95 139.00 -1.08 1700000.0 1405000.0 11.30 NaN 0
189 Uzbekistan UZB republic Tashkent 9/1/1991 41 19 N 69 15 E 28929716 73.29 1.130000e+11 ... 99.4 5.4 NaN 48.87 -2.46 16990000.0 4689000.0 NaN NaN 1
190 Vanuatu VUT parliamentary republic Port-Vila 7/30/1980 17 44 S 168 19 E 266937 72.72 1.270000e+09 ... 83.2 4.1 NaN NaN -1.50 115900.0 17000.0 6.70 NaN 0
191 Venezuela VUT federal republic Caracas 7/5/1811 10 29 N 66 52 W 28868486 74.39 4.070000e+11 ... 95.5 5.2 1.05 1233.00 0.00 14010000.0 8918000.0 18.00 NaN 1
192 Vietnam VNM Communist state Hanoi 9/2/1945 21 02 N 105 51 E 93421835 72.91 3.590000e+11 ... 93.4 6.8 2.37 884.10 -0.32 52930000.0 23382000.0 10.50 NaN 1
193 Western Sahara ESH legal status of territory and issue of soverei... Laayoune 2/27/1976 27 09 N 13 12 W 554795 62.27 9.065000e+08 ... NaN NaN NaN NaN NaN 144000.0 NaN NaN NaN 1
194 Yemen YEM republic Sanaa 5/22/1990 15 21 N 44 12 E 26052966 64.83 6.163000e+10 ... 65.3 5.5 4.02 2.10 2.61 7100000.0 2349000.0 22.00 NaN 1
195 Zambia ZMB republic Lusaka 10/24/1964 15 25 S 28 17 E 14638505 51.83 2.547000e+10 ... 61.4 6.1 1.55 105.20 -0.72 6275000.0 816200.0 10.40 NaN 1
196 Zimbabwe ZWE parliamentary democracy Harare 4/18/1980 17 49 S 31 02 E 13771721 55.68 7.496000e+09 ... 83.6 NaN 2.94 20.00 21.78 3939000.0 1423000.0 28.00 NaN 0

197 rows × 23 columns


In [5]:
# Determining characteristics of objects within the matrix for data cleansing
fort.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197 entries, 0 to 196
Data columns (total 23 columns):
Country Name                          197 non-null object
Country Code                          197 non-null object
Government Type                       197 non-null object
Capital City                          197 non-null object
Date of Founding/Independence         195 non-null object
Latitude of Capital                   196 non-null object
Longitude of Capital                  196 non-null object
Population                            197 non-null int64
Life Expectancy                       197 non-null float64
GDP (PPP) in US $                     197 non-null float64
Area (sq km)                          197 non-null float64
Land Boundaries (km)                  197 non-null float64
Coastline (km)                        197 non-null float64
Literacy Rate (%)                     193 non-null float64
Health Expenditure/GDP                189 non-null float64
Military Expenditure/GDP              140 non-null float64
Renewable Water Resources (cu km)     172 non-null float64
Net Migration Rate/1000 Population    193 non-null float64
Labor Force                           195 non-null float64
Internet Users (circa 2009)           189 non-null float64
Commercial Prime Lending Rate         166 non-null float64
Unnamed: 21                           0 non-null float64
Gov Type Code                         197 non-null int64
dtypes: float64(14), int64(2), object(7)
memory usage: 35.5+ KB

In [7]:
# Basic Statstical imformation for each data
fort.describe()


Out[7]:
Population Life Expectancy GDP (PPP) in US $ Area (sq km) Land Boundaries (km) Coastline (km) Literacy Rate (%) Health Expenditure/GDP Military Expenditure/GDP Renewable Water Resources (cu km) Net Migration Rate/1000 Population Labor Force Internet Users (circa 2009) Commercial Prime Lending Rate Unnamed: 21 Gov Type Code
count 1.970000e+02 197.000000 1.970000e+02 1.970000e+02 197.000000 197.000000 193.000000 189.000000 140.000000 172.000000 193.000000 1.950000e+02 1.890000e+02 166.000000 0.0 197.000000
mean 3.632940e+07 70.668071 4.401998e+11 6.911638e+05 2746.610152 3870.391878 84.856995 6.989947 1.934429 313.004651 -0.355751 1.692596e+07 9.553316e+06 11.113735 NaN 0.553299
std 1.353657e+08 8.923301 1.642386e+12 1.910397e+06 3502.942654 15946.066456 18.457233 3.035474 1.554276 861.174188 11.537366 6.883974e+07 3.553092e+07 6.794550 NaN 0.498418
min 9.488000e+03 49.440000 4.000000e+07 2.000000e+00 0.000000 0.000000 27.000000 1.900000 0.000000 0.020000 -113.510000 3.615000e+03 2.100000e+03 0.250000 NaN 0.000000
25% 1.693398e+06 64.830000 1.140000e+10 2.571300e+04 266.000000 37.000000 73.900000 4.900000 1.050000 13.065000 -1.930000 7.468000e+05 1.500000e+05 6.050000 NaN 0.000000
50% 7.351374e+06 73.230000 4.267000e+10 1.205380e+05 1744.000000 491.000000 93.500000 6.600000 1.470000 59.800000 -0.130000 3.373000e+06 1.000000e+06 9.940000 NaN 1.000000
75% 2.469214e+07 76.800000 2.490000e+11 5.279680e+05 4158.000000 2413.000000 98.900000 8.800000 2.375000 200.000000 0.900000 9.477500e+06 4.700000e+06 14.000000 NaN 1.000000
max 1.355693e+09 89.570000 1.670000e+13 1.709824e+07 22457.000000 202080.000000 100.000000 19.500000 10.320000 8233.000000 83.820000 7.976000e+08 3.890000e+08 44.000000 NaN 1.000000

In [9]:
import plotly 
plotly.tools.set_credentials_file(username='pandamic', api_key='cg2kpwsuW7kMWub4tUhW')
import plotly.plotly as py

In [10]:
# Data map reveal that renewable water is concentrated on few countries.
# Coastline,land boundaries, or location 
# does not seem to have meaningful correlation with renewable water resource.

data = [ dict(
        type = 'choropleth',
        locations = fort['Country Code'],
        z = fort['Renewable Water Resources (cu km)'],
        text = fort['Country Name'],
        colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
            [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
        autocolorscale = False,
        reversescale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '(cu km)',
            title = 'Renewable Water Resources'),
      ) ]

layout = dict(
    title = 'Global Renewable Water Resources',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )


High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~pandamic/0 or inside your plot.ly account where it is named 'd3-world-map'
Out[10]:

In [209]:
# Top 10 Countries highest life expectancy. 
# Joint plot will be used to determine if GDP (PPP) in USD by country have correlation.
fort.nlargest(10, 'Life Expectancy')


Out[209]:
Country Name Country Code Government Type Capital City Date of Founding/Independence Latitude of Capital Longitude of Capital Population Life Expectancy GDP (PPP) in US $ ... Literacy Rate (%) Health Expenditure/GDP Military Expenditure/GDP Renewable Water Resources (cu km) Net Migration Rate/1000 Population Labor Force Internet Users (circa 2009) Commercial Prime Lending Rate Unnamed: 21 Gov Type Code
116 Monaco MCO constitutional monarchy Monaco 1419 43 44 N 7 25 E 30508 89.57 6.213000e+09 ... 99.0 4.3 NaN NaN 2.85 52490.0 23000.0 NaN NaN 0
84 Japan JPN a parliamentary government with a constitution... Tokyo 5/3/1947 35 41 N 139 45 E 127103388 84.46 4.730000e+12 ... 99.0 9.3 0.99 430.0 0.00 65620000.0 99182000.0 1.50 NaN 0
156 Singapore SGP parliamentary republic Singapore 8/9/1965 1 17 N 103 51 E 5567301 84.38 3.390000e+11 ... 95.9 4.6 3.52 0.6 14.55 3444000.0 3235000.0 5.38 NaN 0
149 San Marino SMR republic San Marino 9/5/0301 43 56 N 12 25 E 32742 83.18 1.306000e+09 ... 96.0 7.2 NaN NaN 8.31 21960.0 17000.0 5.92 NaN 1
3 Andorra AND parliamentary democracy Andorra la Vella 1278 42 30 N 1 31 E 85458 82.65 3.163000e+09 ... 100.0 7.2 NaN NaN 0.00 36060.0 67100.0 NaN NaN 0
169 Switzerland CHE formally a confederation but similar in struct... Bern 8/15/1291 46 55 N 7 28 E 8061516 82.39 3.710000e+11 ... 99.0 10.9 0.76 53.5 5.43 4976000.0 6152000.0 2.70 NaN 1
8 Australia AUS federal parliamentary democracy Canberra 1/1/1901 35 16 S 149 08 E 22507617 82.07 9.980000e+11 ... 99.0 9.0 1.71 492.0 5.74 12440000.0 15810000.0 6.20 NaN 0
82 Italy ITA republic Rome 3/17/1861 41 54 N 12 29 E 61680122 82.03 1.810000e+12 ... 99.0 9.5 1.69 191.3 4.29 25740000.0 29235000.0 5.20 NaN 1
168 Sweden SWE constitutional monarchy Stockholm 6/26/1523 59 20 N 18 03 E 9723809 81.89 3.940000e+11 ... 99.0 9.4 1.18 174.0 5.46 5107000.0 8398000.0 3.30 NaN 0
100 Liechtenstein LIE hereditary constitutional monarchy Vaduz 1/23/1719 47 08 N 9 31 E 37313 81.68 3.200000e+09 ... 100.0 NaN NaN NaN 4.72 35830.0 23000.0 NaN NaN 0

10 rows × 23 columns


In [211]:
# Histogram and Scatterplot reveal there is no meaningful correlation between 
# the two data. Correlation must be tested with other data to determine 
# which factor influence Life Expectancy/GDP PPP in US$
sns.jointplot(x='GDP (PPP) in US $',y='Life Expectancy',data=fort,marginal_kws=dict(bins=20, rug=True))


Out[211]:
<seaborn.axisgrid.JointGrid at 0x1f291a20>

In [212]:
# Heatmap of correlation reveal strong correlation between Life Expectancy/Literacy Rate, Internet Users/GDP (PPP) in USD,
# Internet Users/Population, Labor Force/Population, GDP (PPP) in USD/Population, Land Area/Renewable Water Resources, Land Area/Land Boundaries
# The weakest correlation was between Commercial Prime Lending Rate/Life Expectancy

In [213]:
sns.heatmap(fort.corr())


Out[213]:
<matplotlib.axes._subplots.AxesSubplot at 0x1aa03cf8>

In [282]:
fort.corr(method='pearson')


Out[282]:
Population Life Expectancy GDP (PPP) in US $ Area (sq km) Land Boundaries (km) Coastline (km) Literacy Rate (%) Health Expenditure/GDP Military Expenditure/GDP Renewable Water Resources (cu km) ... Internet Users (circa 2009) Commercial Prime Lending Rate Unnamed: 21 Gov Type Code GDP per capita Literacy Rank Health Expenditure Rank Renewable Water Rank Social Welfare Score Social Welfare Rank
Population 1.000000 0.014249 0.697699 0.453228 0.575146 0.120474 -0.042873 -0.070402 0.032931 0.430337 ... 0.762276 -0.048858 NaN 0.129869 -0.052047 0.062101 0.109791 -0.320585 -0.065251 -0.060485
Life Expectancy 0.014249 1.000000 0.175169 0.033022 -0.219494 0.162933 0.727242 0.122042 -0.061292 0.078324 ... 0.153988 -0.479638 NaN -0.359589 0.575330 -0.699306 -0.193162 -0.005842 -0.490299 -0.484873
GDP (PPP) in US $ 0.697699 0.175169 1.000000 0.592458 0.500612 0.204549 0.124609 0.194036 0.102590 0.474667 ... 0.940299 -0.184486 NaN 0.004847 0.136734 -0.131456 -0.101648 -0.318491 -0.292657 -0.258074
Area (sq km) 0.453228 0.033022 0.592458 1.000000 0.749098 0.521336 0.051421 0.064359 0.142666 0.760594 ... 0.566731 -0.031999 NaN 0.089310 0.046559 -0.083081 -0.028464 -0.383731 -0.256742 -0.223684
Land Boundaries (km) 0.575146 -0.219494 0.500612 0.749098 1.000000 0.195977 -0.149202 -0.111201 0.149657 0.677464 ... 0.533452 0.140457 NaN 0.296049 -0.174938 0.103850 0.137635 -0.506141 -0.139963 -0.128999
Coastline (km) 0.120474 0.162933 0.204549 0.521336 0.195977 1.000000 0.134840 0.108545 -0.039816 0.382807 ... 0.175666 -0.143983 NaN -0.094501 0.133669 -0.167531 -0.119055 -0.281680 -0.289821 -0.248436
Literacy Rate (%) -0.042873 0.727242 0.124609 0.051421 -0.149202 0.134840 1.000000 0.131959 -0.109809 0.082846 ... 0.119230 -0.370510 NaN -0.286413 0.424846 -0.876825 -0.202281 0.023214 -0.561547 -0.570807
Health Expenditure/GDP -0.070402 0.122042 0.194036 0.064359 -0.111201 0.108545 0.131959 1.000000 -0.174352 0.061954 ... 0.145360 -0.132482 NaN -0.187269 0.041453 -0.218742 -0.924852 -0.129730 -0.679191 -0.669180
Military Expenditure/GDP 0.032931 -0.061292 0.102590 0.142666 0.149657 -0.039816 -0.109809 -0.174352 1.000000 0.037427 ... 0.083766 -0.054926 NaN 0.194757 0.010822 0.090270 0.201836 0.246741 0.229951 0.233644
Renewable Water Resources (cu km) 0.430337 0.078324 0.474667 0.760594 0.677464 0.382807 0.082846 0.061954 0.037427 1.000000 ... 0.479867 0.115384 NaN 0.072272 0.012284 -0.041024 -0.024720 -0.505508 -0.277424 -0.260199
Net Migration Rate/1000 Population 0.004439 0.132568 0.033270 0.038358 0.001740 0.041053 0.047339 0.001094 0.238040 0.004368 ... 0.019851 -0.089734 NaN -0.031265 0.189918 -0.058778 -0.038447 0.060945 -0.051360 -0.044354
Labor Force 0.980439 0.032306 0.730507 0.477155 0.583571 0.123848 -0.011220 -0.056131 0.038110 0.441737 ... 0.832356 -0.058825 NaN 0.118655 -0.040202 0.031610 0.092565 -0.309049 -0.086169 -0.082467
Internet Users (circa 2009) 0.762276 0.153988 0.940299 0.566731 0.533452 0.175666 0.119230 0.145360 0.083766 0.479867 ... 1.000000 -0.154649 NaN 0.041041 0.086114 -0.113688 -0.078505 -0.315760 -0.262868 -0.240856
Commercial Prime Lending Rate -0.048858 -0.479638 -0.184486 -0.031999 0.140457 -0.143983 -0.370510 -0.132482 -0.054926 0.115384 ... -0.154649 1.000000 NaN 0.193859 -0.559802 0.405585 0.173698 -0.144660 0.236185 0.224637
Unnamed: 21 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Gov Type Code 0.129869 -0.359589 0.004847 0.089310 0.296049 -0.094501 -0.286413 -0.187269 0.194757 0.072272 ... 0.041041 0.193859 NaN 1.000000 -0.284565 0.288920 0.174506 -0.080670 0.217223 0.211369
GDP per capita -0.052047 0.575330 0.136734 0.046559 -0.174938 0.133669 0.424846 0.041453 0.010822 0.012284 ... 0.086114 -0.559802 NaN -0.284565 1.000000 -0.488638 -0.062112 0.121273 -0.342859 -0.316913
Literacy Rank 0.062101 -0.699306 -0.131456 -0.083081 0.103850 -0.167531 -0.876825 -0.218742 0.090270 -0.041024 ... -0.113688 0.405585 NaN 0.288920 -0.488638 1.000000 0.300976 -0.013587 0.689448 0.692934
Health Expenditure Rank 0.109791 -0.193162 -0.101648 -0.028464 0.137635 -0.119055 -0.202281 -0.924852 0.201836 -0.024720 ... -0.078505 0.173698 NaN 0.174506 -0.062112 0.300976 1.000000 0.089874 0.737573 0.733078
Renewable Water Rank -0.320585 -0.005842 -0.318491 -0.383731 -0.506141 -0.281680 0.023214 -0.129730 0.246741 -0.505508 ... -0.315760 -0.144660 NaN -0.080670 0.121273 -0.013587 0.089874 1.000000 0.513369 0.504461
Social Welfare Score -0.065251 -0.490299 -0.292657 -0.256742 -0.139963 -0.289821 -0.561547 -0.679191 0.229951 -0.277424 ... -0.262868 0.236185 NaN 0.217223 -0.342859 0.689448 0.737573 0.513369 1.000000 0.995232
Social Welfare Rank -0.060485 -0.484873 -0.258074 -0.223684 -0.128999 -0.248436 -0.570807 -0.669180 0.233644 -0.260199 ... -0.240856 0.224637 NaN 0.211369 -0.316913 0.692934 0.733078 0.504461 0.995232 1.000000

22 rows × 22 columns


In [216]:
# Permutation test of 60 samples were used to validate the correlation found above
fort_perm=fort.take(np.random.permutation(len(fort))[:60])
fort_perm.corr(method='pearson')


Out[216]:
Population Life Expectancy GDP (PPP) in US $ Area (sq km) Land Boundaries (km) Coastline (km) Literacy Rate (%) Health Expenditure/GDP Military Expenditure/GDP Renewable Water Resources (cu km) Net Migration Rate/1000 Population Labor Force Internet Users (circa 2009) Commercial Prime Lending Rate Unnamed: 21 Gov Type Code
Population 1.000000 0.073371 0.678303 0.481506 0.485153 0.295005 -0.023282 -0.096185 0.003101 0.628854 -0.038535 0.965717 0.744480 0.061038 NaN 0.206372
Life Expectancy 0.073371 1.000000 0.278956 0.072750 -0.275362 0.267647 0.701233 0.150831 -0.255679 0.100094 0.011001 0.110962 0.264321 -0.380523 NaN -0.463124
GDP (PPP) in US $ 0.678303 0.278956 1.000000 0.417775 0.212644 0.433368 0.195628 0.113652 -0.049732 0.449695 0.014912 0.767470 0.965093 -0.165427 NaN -0.070106
Area (sq km) 0.481506 0.072750 0.417775 1.000000 0.570606 0.320697 0.062639 -0.013319 0.058998 0.709736 0.115500 0.565550 0.462981 0.180929 NaN 0.107584
Land Boundaries (km) 0.485153 -0.275362 0.212644 0.570606 1.000000 -0.147022 -0.246420 -0.199839 0.152603 0.554119 0.083545 0.508359 0.289262 0.442942 NaN 0.413913
Coastline (km) 0.295005 0.267647 0.433368 0.320697 -0.147022 1.000000 0.260593 0.065886 -0.122229 0.160519 0.007130 0.324618 0.381959 -0.284688 NaN -0.216913
Literacy Rate (%) -0.023282 0.701233 0.195628 0.062639 -0.246420 0.260593 1.000000 0.115022 -0.414542 0.072863 -0.013466 0.031367 0.166050 -0.352465 NaN -0.400402
Health Expenditure/GDP -0.096185 0.150831 0.113652 -0.013319 -0.199839 0.065886 0.115022 1.000000 -0.379837 0.105148 -0.136168 -0.029983 0.129646 0.009699 NaN -0.463199
Military Expenditure/GDP 0.003101 -0.255679 -0.049732 0.058998 0.152603 -0.122229 -0.414542 -0.379837 1.000000 -0.054890 0.373954 -0.002789 -0.056874 -0.001606 NaN 0.221994
Renewable Water Resources (cu km) 0.628854 0.100094 0.449695 0.709736 0.554119 0.160519 0.072863 0.105148 -0.054890 1.000000 -0.000106 0.761974 0.614996 0.355366 NaN 0.097443
Net Migration Rate/1000 Population -0.038535 0.011001 0.014912 0.115500 0.083545 0.007130 -0.013466 -0.136168 0.373954 -0.000106 1.000000 -0.024644 0.026421 0.088334 NaN -0.159831
Labor Force 0.965717 0.110962 0.767470 0.565550 0.508359 0.324618 0.031367 -0.029983 -0.002789 0.761974 -0.024644 1.000000 0.849505 0.097105 NaN 0.167708
Internet Users (circa 2009) 0.744480 0.264321 0.965093 0.462981 0.289262 0.381959 0.166050 0.129646 -0.056874 0.614996 0.026421 0.849505 1.000000 -0.061772 NaN -0.058420
Commercial Prime Lending Rate 0.061038 -0.380523 -0.165427 0.180929 0.442942 -0.284688 -0.352465 0.009699 -0.001606 0.355366 0.088334 0.097105 -0.061772 1.000000 NaN 0.127874
Unnamed: 21 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Gov Type Code 0.206372 -0.463124 -0.070106 0.107584 0.413913 -0.216913 -0.400402 -0.463199 0.221994 0.097443 -0.159831 0.167708 -0.058420 0.127874 NaN 1.000000

In [217]:
# Linear Regression model is used to predict GDP per capita, with Life Expectancy and Litereacy Rate.
# The two data was used because they have the most correlation with GDP per capita

In [218]:
fort.columns


Out[218]:
Index(['Country Name', 'Country Code', 'Government Type', 'Capital City',
       'Date of Founding/Independence', 'Latitude of Capital',
       'Longitude of Capital', 'Population', 'Life Expectancy',
       'GDP (PPP) in US $', 'Area (sq km)', 'Land Boundaries (km)',
       'Coastline (km)', 'Literacy Rate (%)', 'Health Expenditure/GDP',
       'Military Expenditure/GDP', 'Renewable Water Resources (cu km)',
       'Net Migration Rate/1000 Population', 'Labor Force',
       'Internet Users (circa 2009)', 'Commercial Prime Lending Rate',
       'Unnamed: 21', 'Gov Type Code'],
      dtype='object')

In [281]:
# Creating data for GDP per capita
fort['GDP per capita'] = fort['GDP (PPP) in US $'] / fort['Population']

In [220]:
# Training Linear Regression Model

In [283]:
LRsample = fort[['GDP per capita','Internet Users (circa 2009)','Life Expectancy']].copy()
LR = LRsample.dropna(axis=0)

In [284]:
X = LR[['Internet Users (circa 2009)','Life Expectancy']]

In [285]:
y = LR['GDP per capita']

In [286]:
from sklearn.model_selection import train_test_split

In [287]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [288]:
from sklearn.linear_model import LinearRegression

In [289]:
lm = LinearRegression()

In [290]:
lm.fit(X_train,y_train)


Out[290]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [291]:
# The coefficients
print('Coefficients: \n', lm.coef_)


Coefficients: 
 [  1.65023348e-05   1.22251354e+03]

In [292]:
# Predicting Test Data

In [293]:
predictions = lm.predict( X_test)

In [294]:
plt.scatter(y_test,predictions)
plt.xlabel('Y Test')
plt.ylabel('Predicted Y')


Out[294]:
<matplotlib.text.Text at 0x2ae5fbe0>

In [295]:
# Evaluation show that the Linear regression model explain only about 27% of the variance in forecast for GDP par capita
# Other factors must be included to create more precise model
## need to find a method to label each point with country name

In [300]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))


MAE: 12469.8099676
MSE: 658005771.73
RMSE: 25651.6231792

In [301]:
metrics.explained_variance_score(y_test,predictions)


Out[301]:
0.27081493129115541

In [234]:
# Ranking General Social Welfare

In [235]:
# Creating Rankings for data that influence social welfare
fort['Literacy Rank'] = fort['Literacy Rate (%)'].rank(ascending=False,numeric_only=float)
fort['Health Expenditure Rank'] = fort['Health Expenditure/GDP'].rank(ascending=False,numeric_only=float)
fort['Renewable Water Rank'] = fort['Renewable Water Resources (cu km)'].rank(ascending=False,numeric_only=float)

In [236]:
# Social Welfare Ranking based on three rankings that contribute
fort['Social Welfare Score']=fort['Literacy Rank'] + fort['Health Expenditure Rank'] + fort['Renewable Water Rank']
fort['Social Welfare Rank'] = fort['Social Welfare Score'].rank(ascending=True,numeric_only=float)

In [237]:
SocialWelfare = fort[['Country Name', 'Social Welfare Rank', 'Literacy Rank', 'Health Expenditure Rank', 'Renewable Water Rank']].copy()

In [238]:
# Based on the Social Welfare Ranking Model below, the best country for social welfare is United States and the worst
# is Eritrea. This evaluation is terrible due to multiple reasons. First of all, the data given for this model
# is too limited to compare multiple social welfares. Factors such as quality of environment, level of crime, availibility
# of essential social services and many more are omitted. Secondly. the the data for certain criterias are unknown.
# The model was not able to rank countries that had unknown data. Thirdly, each criteria used to evaluate Social
# Welfare Rank have different level of impact. For example, United States Rank third on Health Expenditure/GDP
# but life Expectancy does not even rank top 20. It is uncertain how much each criteria contribute to general social welfare.

In [239]:
SocialWelfare.nsmallest(20, 'Social Welfare Rank')


Out[239]:
Country Name Social Welfare Rank Literacy Rank Health Expenditure Rank Renewable Water Rank
187 United States 1.0 37.5 3.0 3.0
31 Canada 2.0 37.5 11.5 4.0
130 Norway 3.0 4.0 40.0 28.0
59 France 4.0 37.5 9.0 42.0
126 New Zealand 5.0 37.5 24.0 32.0
84 Japan 6.0 37.5 37.0 26.0
63 Germany 7.0 37.5 13.0 51.0
8 Australia 8.0 37.5 43.5 21.0
82 Italy 9.0 37.5 31.5 45.0
58 Finland 10.0 4.0 46.5 64.0
125 Netherlands 11.5 37.5 8.0 72.0
168 Sweden 11.5 37.5 34.0 46.0
143 Russia 13.0 16.0 102.5 2.0
75 Iceland 14.0 37.5 40.0 47.0
153 Serbia 15.5 59.0 20.5 49.0
186 United Kingdom 15.5 37.5 37.0 54.0
62 Georgia 17.0 16.0 34.0 85.0
43 Cuba 18.0 9.5 26.0 100.0
6 Argentina 19.0 61.5 59.0 17.0
34 Chile 20.0 53.0 73.0 14.0

In [240]:
SocialWelfare.nlargest(20, 'Social Welfare Rank')


Out[240]:
Country Name Social Welfare Rank Literacy Rank Health Expenditure Rank Renewable Water Rank
54 Eritrea 168.0 156.0 183.0 143.0
61 Gambia, The 167.0 180.0 152.0 140.0
131 Oman 166.0 127.0 187.0 155.0
185 United Arab Emirates 165.0 114.0 177.0 163.0
151 Saudi Arabia 164.0 125.0 172.0 152.0
2 Algeria 163.0 149.0 165.0 134.0
18 Benin 161.5 184.0 148.5 111.0
92 Kuwait 161.5 91.5 181.0 171.0
33 Chad 160.0 188.0 155.5 97.0
28 Cabo Verde 159.0 131.0 145.0 161.5
194 Yemen 158.0 162.0 121.0 153.0
170 Syria 157.0 133.5 172.0 125.0
111 Mauritania 156.0 170.0 123.5 136.0
99 Libya 155.0 118.0 152.0 159.0
37 Comoros 154.0 143.0 129.5 156.0
141 Qatar 153.0 71.5 189.0 166.0
128 Niger 152.0 190.5 129.5 105.0
12 Bahrain 150.5 86.0 168.5 164.0
26 Burkina Faso 150.5 190.5 96.0 132.0
19 Bhutan 149.0 179.0 160.0 75.0

In [241]:
# K Nearest Neighbors Model will be used to determine 
# Countries based on Democratic, Parliamentary, or Consitutional system are assigned with code 0
# Countries based on Republic, Communist, or Military system are assigned with code 1
# Model show that Higher GDP per capita and Life Expectancy will tend to be code 0

In [242]:
KNN=fort.dropna(axis=1)

In [243]:
df=KNN[['Life Expectancy','GDP per capita','Population','Gov Type Code']]

In [244]:
df


Out[244]:
Life Expectancy GDP per capita Population Gov Type Code
0 50.49 1423.505527 31822848 1
1 77.96 9383.456575 3020209 0
2 76.39 7342.763984 38813722 1
3 82.65 37012.333544 85458 0
4 55.29 6915.301078 19088106 1
5 76.12 17635.138836 91295 0
6 77.51 17920.074793 43024374 1
7 74.12 6733.905525 3060631 1
8 82.07 44340.544803 22507617 0
9 80.17 43900.921579 8223062 1
10 71.91 10633.674058 9686210 1
11 71.93 35421.987733 321834 0
12 78.58 26603.981922 1314089 0
13 70.65 1954.526151 166280712 0
14 74.99 24178.403756 289680 0
15 72.15 15611.895765 9608058 1
16 79.92 40385.244610 10449361 0
17 68.49 9045.193696 340844 0
18 61.07 1638.689851 10160556 1
19 68.98 7135.623185 733643 0
20 68.55 5559.900093 10631486 1
21 76.33 8306.550991 3871643 1
22 54.06 15771.524420 2155784 0
23 73.28 11941.371537 202656788 1
24 76.77 52640.917963 422675 0
25 74.33 15163.076724 6924716 0
26 54.78 1443.497002 18365123 0
27 59.55 553.101016 10395931 1
28 71.57 4126.008523 538535 1
29 63.78 2564.312890 15458332 0
... ... ... ... ...
167 50.54 4408.917015 1419623 1
168 81.89 40519.101105 9723809 0
169 82.39 46021.120593 8061516 1
170 68.41 6016.163761 17951639 1
171 79.84 39640.533139 23359928 0
172 67.06 2384.645269 8051512 1
173 61.24 1597.328302 49639138 1
174 74.18 9934.840291 67741401 0
175 67.39 21147.825045 1201542 1
176 64.06 999.541038 7351374 1
177 75.82 7948.139797 106440 0
178 72.29 22174.724409 1223916 0
179 75.68 9874.266756 10937521 1
180 73.29 14334.828664 81619392 0
181 69.47 10665.237417 5171943 0
182 65.81 3709.886848 10782 0
183 54.46 1513.687148 35918915 1
184 69.14 7608.698327 44291413 1
185 77.09 47967.552616 5628805 1
186 80.42 37494.326630 63742977 0
187 79.56 52368.810149 318892103 0
188 76.81 16882.830099 3332972 0
189 73.29 3906.018296 28929716 1
190 72.72 4757.676905 266937 0
191 74.39 14098.418601 28868486 1
192 72.91 3842.784719 93421835 1
193 62.27 1633.936860 554795 1
194 64.83 2365.565594 26052966 1
195 51.83 1739.931776 14638505 1
196 55.68 544.303795 13771721 0

197 rows × 4 columns


In [245]:
sns.pairplot(df,hue='Gov Type Code',palette='coolwarm')


Out[245]:
<seaborn.axisgrid.PairGrid at 0x165d20f0>

In [246]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [247]:
scaler.fit(df.drop('Gov Type Code',axis=1))


Out[247]:
StandardScaler(copy=True, with_mean=True, with_std=True)

In [ ]:
# normalize variables

In [248]:
scaled_features = scaler.transform(df.drop('Gov Type Code',axis=1))

In [249]:
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat.head()


Out[249]:
Life Expectancy GDP per capita Population
0 -2.267040 -0.694011 -0.033376
1 0.819260 -0.321395 -0.246695
2 0.642868 -0.416923 0.018399
3 1.346190 0.971948 -0.268431
4 -1.727752 -0.436933 -0.127693

In [250]:
# Train Test Split

In [251]:
from sklearn.model_selection import train_test_split

In [252]:
X_train, X_test, y_train, y_test = train_test_split(scaled_features,df['Gov Type Code'],
                                                    test_size=0.30)

In [253]:
# Using KNN

In [254]:
from sklearn.neighbors import KNeighborsClassifier

In [255]:
knn = KNeighborsClassifier(n_neighbors=1)

In [256]:
knn.fit(X_train,y_train)


Out[256]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

In [257]:
# Predictions and Evaluaions

In [258]:
pred = knn.predict(X_test)

In [259]:
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,pred))


[[17  7]
 [17 19]]

In [260]:
print(classification_report(y_test,pred))


             precision    recall  f1-score   support

          0       0.50      0.71      0.59        24
          1       0.73      0.53      0.61        36

avg / total       0.64      0.60      0.60        60


In [261]:
error_rate = []
for i in range(1,80):
    
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i != y_test))

In [262]:
plt.figure(figsize=(10,6))
plt.plot(range(1,80),error_rate,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')


Out[262]:
<matplotlib.text.Text at 0x259b11d0>

In [263]:
# Now with K=30, which has the lowest error rate
knn = KNeighborsClassifier(n_neighbors=30)

knn.fit(X_train,y_train)
pred = knn.predict(X_test)

print('WITH K=30')
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))


WITH K=30


[[23  1]
 [16 20]]


             precision    recall  f1-score   support

          0       0.59      0.96      0.73        24
          1       0.95      0.56      0.70        36

avg / total       0.81      0.72      0.71        60


In [307]:
fort.columns


Out[307]:
Index(['Country Name', 'Country Code', 'Government Type', 'Capital City',
       'Date of Founding/Independence', 'Latitude of Capital',
       'Longitude of Capital', 'Population', 'Life Expectancy',
       'GDP (PPP) in US $', 'Area (sq km)', 'Land Boundaries (km)',
       'Coastline (km)', 'Literacy Rate (%)', 'Health Expenditure/GDP',
       'Military Expenditure/GDP', 'Renewable Water Resources (cu km)',
       'Net Migration Rate/1000 Population', 'Labor Force',
       'Internet Users (circa 2009)', 'Commercial Prime Lending Rate',
       'Unnamed: 21', 'Gov Type Code', 'GDP per capita', 'Literacy Rank',
       'Health Expenditure Rank', 'Renewable Water Rank',
       'Social Welfare Score', 'Social Welfare Rank'],
      dtype='object')

In [314]:
fort2=fort[['Country Name','Population', 'Life Expectancy',
       'Area (sq km)', 'Land Boundaries (km)',
       'Coastline (km)', 'Literacy Rate (%)', 'Health Expenditure/GDP',
       'Military Expenditure/GDP', 'Renewable Water Resources (cu km)',
       'Net Migration Rate/1000 Population', 'Labor Force',
       'Internet Users (circa 2009)', 'Commercial Prime Lending Rate',
       ]].copy()
fort3=fort[['Population', 'Life Expectancy',
       'Area (sq km)', 'Land Boundaries (km)',
       'Coastline (km)', 'Literacy Rate (%)', 'Health Expenditure/GDP',
       'Military Expenditure/GDP', 'Renewable Water Resources (cu km)',
       'Net Migration Rate/1000 Population', 'Labor Force',
       'Internet Users (circa 2009)', 'Commercial Prime Lending Rate',
       ]].copy()

In [ ]:
# Distance Functions were used to evaluate relationship between countries
# Further research is required to label each point by country name

In [320]:
from sklearn.metrics.pairwise import euclidean_distances
zero_data = fort3.fillna(0)
pairwise_dist_mat = euclidean_distances(zero_data)
pairwise_dist_mat


Out[320]:
array([[        0.        ,  29516269.53653513,   8876292.75895193, ...,
          5941089.73287505,  17230080.48553707,  18408063.98599282],
       [ 29516269.53653513,         0.        ,  37407417.57810977, ...,
         23830264.87146997,  12749282.72266758,  11127105.52555638],
       [  8876292.75895192,  37407417.57810977,         0.        , ...,
         13718713.35802696,  25018886.61929344,  26340142.26717158],
       ..., 
       [  5941089.73287505,  23830264.87146997,  13718713.35802696, ...,
                0.        ,  11548615.14464289,  12716021.74003095],
       [ 17230080.48553707,  12749282.72266758,  25018886.61929344, ...,
         11548615.14464288,         0.        ,   2589858.69378191],
       [ 18408063.98599282,  11127105.52555638,  26340142.26717158, ...,
         12716021.74003095,   2589858.69378191,         0.        ]])