In [1]:
# numerical python package to do fast computations.
import numpy as np
import pandas as pd
# plotting library to visulise the data.
import matplotlib.pyplot as plt
# Instruction to matplot to display plot in Jupyter Notebook itself.
%matplotlib inline
In [ ]:
data_points_count = 50
# x = np.linspace(0, 10, data_points_count)
x = np.array([ 0. , 0.20408163, 0.40816327, 0.6122449 ,
0.81632653, 1.02040816, 1.2244898 , 1.42857143,
1.63265306, 1.83673469, 2.04081633, 2.24489796,
2.44897959, 2.65306122, 2.85714286, 3.06122449,
3.26530612, 3.46938776, 3.67346939, 3.87755102,
4.08163265, 4.28571429, 4.48979592, 4.69387755,
4.89795918, 5.10204082, 5.30612245, 5.51020408,
5.71428571, 5.91836735, 6.12244898, 6.32653061,
6.53061224, 6.73469388, 6.93877551, 7.14285714,
7.34693878, 7.55102041, 7.75510204, 7.95918367,
8.16326531, 8.36734694, 8.57142857, 8.7755102 ,
8.97959184, 9.18367347, 9.3877551 , 9.59183673,
9.79591837, 10. ])
m = np.random.random(1) * 5
c = np.random.random(1) * 10
y = m * x + c + 3.5 * np.random.random(data_points_count)
y2 = 5 * np.sin(x) + 1 * x + 0.1 * x * x + 12 * np.random.random(data_points_count)
print(m, c)
plt.figure(figsize=(15, 8))
plt.plot(x, y, '*r')
In [ ]:
%%sh
# wget http://mospi.nic.in/sites/default/files/logo.png
# wget https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/G20.svg/863px-G20.svg.png
Navigation Path: Home > Statistical Year Book India 2016 > INDIA, G-20 AND THE WORLD
The G20 (or G-20 or Group of Twenty) is an international forum for the governments and central bank governors from 20 major economies. It was founded in 1999 with the aim of studying, reviewing, and promoting high-level discussion of policy issues pertaining to the promotion of international financial stability.[3] It seeks to address issues that go beyond the responsibilities of any one organization.[3] The G20 heads of government or heads of state have periodically conferred at summits since their initial meeting in 2008, and the group also hosts separate meetings of finance ministers and central bank governors.
The members include 19 individual countries and along with the European Union (EU). The EU is represented by the European Commission and by the European Central Bank. Collectively, the G20 economies account for around 85% of the gross world product (GWP), 80% of world trade (or, if excluding EU intra-trade, 75%), and two-thirds of the world population.[2]
Data Source:
References:
In [ ]:
%%sh
# ls -l ~/Downloads/G20*csv
# mv ~/Downloads/G20*csv G20.csv
In [93]:
data = pd.read_csv('G20.csv')
cols = ['Area', 'Population_2010', 'Population_2011',
'Population_2012', 'Population_2013', 'Population_2014',
'Population_2015', 'GDP_2010', 'GDP_2011', 'GDP_2012', 'GDP_2013',
'GDP_2014', 'GDP_2015', 'GDP_PCI_2010', 'GDP_PCI_2011', 'GDP_PCI_2012',
'GDP_PCI_2013', 'GDP_PCI_2014', 'GDP_PCI_2015', 'GDP_PPP_2010',
'GDP_PPP_2011', 'GDP_PPP_2012', 'GDP_PPP_2013', 'GDP_PPP_2014',
'GDP_PPP_2015']
data[cols] = data[cols].applymap(lambda x: float(str(x).replace(',', '')))
all_countries = sorted(data.Country.unique())
country_labler = all_countries.index
# country_labler('India')
# data.Country = data.Country.map(country_labler)
In [103]:
sorted(data.columns.tolist())
cols1 = ['GDP_2010',
'GDP_2011',
'GDP_2012',
'GDP_2013',
'GDP_2014',
'GDP_2015',]
cols2 = [
'GDP_PPP_2010',
'GDP_PPP_2011',
'GDP_PPP_2012',
'GDP_PPP_2013',
'GDP_PPP_2014',
'GDP_PPP_2015']
cols3 = []
data1 = data[['Area',
'Country',
'GDP_2010',
'GDP_2011',
'GDP_2012',
'GDP_2013',
'GDP_2014',
'GDP_2015',]].copy()
data2 = data[['Area',
'Country',
'GDP_PPP_2010',
'GDP_PPP_2011',
'GDP_PPP_2012',
'GDP_PPP_2013',
'GDP_PPP_2014',
'GDP_PPP_2015',]].copy()
data3 = data[['Area',
'Country',
'GDP_PCI_2010',
'GDP_PCI_2011',
'GDP_PCI_2012',
'GDP_PCI_2013',
'GDP_PCI_2014',
'GDP_PCI_2015',]].copy()
data4 = data[['Area',
'Country',
'Population_2010',
'Population_2011',
'Population_2012',
'Population_2013',
'Population_2014',
'Population_2015']].copy()
In [95]:
import sklearn.cluster
clf = sklearn.cluster.AgglomerativeClustering(5)
pred = clf.fit_predict(data1['GDP_2010 GDP_2011 GDP_2012 GDP_2013 GDP_2014 GDP_2015'.split()])
pred
Out[95]:
In [ ]:
new_data.metric.unique()
In [ ]:
new_data.head(20).copy(deep=True)
In [ ]:
# segregating year & param
new_data['year'] = new_data.metric.map(lambda x: int(x.rsplit('_')[-1]))
new_data['param'] = new_data.metric.map(lambda x: ''.join(x.rsplit('_')[:-1]))
# drop metric column
new_data.drop('metric', axis=1, inplace=True)
# converting data into integers
In [ ]:
# Key values to check how the world
print('Country', new_data.country.unique())
print('Country', new_data.param.unique())
In [ ]:
temp = new_data[(new_data.country == 'USA') & (new_data.param == 'GDP')].copy(deep=True)
temp
In [ ]:
X_Label = 'USA'
Y_Label = 'GDP'
plt.figure(figsize=(15, 5))
temp = new_data[(new_data.country == X_Label) & (new_data.param == Y_Label)].copy(deep=True)
_x, _y = temp.year.values, temp.value.values
plt.plot(_x, _y)
plt.xticks(_x, map(str, _x))
X_Label = 'European Union'
Y_Label = 'GDP'
plt.figure(figsize=(15, 5))
temp = new_data[(new_data.country == X_Label) & (new_data.param == Y_Label)].copy(deep=True)
_x, _y = temp.year.values, temp.value.values
plt.plot(_x, _y)
plt.xticks(_x, map(str, _x))
In [ ]:
X_Label = 'USA'
Y_Label = 'GDP'
plt.figure(figsize=(15, 5))
temp = new_data[(new_data.country == X_Label) & (new_data.param == Y_Label)].copy(deep=True)
_x, _y = temp.year.values, temp.value.values
plt.plot(_x, _y)
plt.xticks(_x, map(str, _x))
In [ ]:
_y
In [ ]:
_y - _y.min()
In [ ]:
Y_Label = 'Population'
plt.figure(figsize=(15, 8))
all_countries = new_data.country.unique()[:5]
for X_Label in all_countries:
temp = new_data[(new_data.country == X_Label) & (new_data.param == Y_Label)].copy(deep=True)
_x, _y = temp.year.values, temp.value.values
_y = _y - _y.min()
plt.plot(_x, _y)
plt.xticks(_x, map(str, _x))
plt.legend(all_countries)
In [164]:
country_codes = {'Argentina': 'ARG',
'Australia': 'AUS',
'Brazil': 'BRA',
'Canada': 'CAN',
'China': 'CHN',
'European Union': 'USA',
'France': 'FRA',
'Germany': 'DEU',
'India': 'IND',
'Indonesia': 'IDN',
'Italy': 'ITA',
'Japan': 'JPN',
'Mexico': 'MEX',
'Republic of Korea': 'USA',
'Russia': 'RUS',
'Saudi Arabia': 'SAU',
'South Africa': 'ZAF',
'Turkey': 'TUR',
'USA': 'USA',
'United Kingdom': 'GBR'}
chart_colors = ["rgb(0,0,0)",
"rgb(255,255,255)",
"rgb(255,0,0)",
"rgb(0,255,0)",
"rgb(0,0,255)",
"rgb(255,255,0)",
"rgb(0,255,255)",
"rgb(255,0,255)",
"rgb(192,192,192)",
"rgb(128,128,128)",
"rgb(128,0,0)",
"rgb(128,128,0)",
"rgb(0,128,0)",
"rgb(128,0,128)",
"rgb(0,128,128)",
"rgb(0,0,128)",]
chart_colors += chart_colors
chart_colors = chart_colors[:len(country_codes)]
data1['Country_Codes'] = data1['Country'].map(lambda x: country_codes[x])
In [140]:
import sklearn.cluster
clf = sklearn.cluster.AgglomerativeClustering(5)
pred = clf.fit_predict(data1['GDP_2010 GDP_2011 GDP_2012 GDP_2013 GDP_2014 GDP_2015'.split()])
pred
Out[140]:
In [141]:
data1['cluster'] = pred
In [112]:
data1['text'] = 'Cluster ID' + data1.cluser
In [142]:
data1.head()
Out[142]:
In [121]:
import plotly.plotly as py
import pandas as pd
# df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
data = [ dict(
type = 'choropleth',
locations = data1['Country_Codes'],
z = data1['cluser'],
text = data1['Country_Codes'],
# colorscale = [[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
# [0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
# autocolorscale = True,
# reversescale = True,
# marker = dict(
# line = dict (
# color = 'rgb(180,180,180)',
# width = 0.5
# ) ),
colorbar = dict(
autotick = False,
tickprefix = '$',
title = 'GDP<br>Billions US$'),
) ]
layout = dict(
title = 'G-20"s GDP',
geo = dict(
showframe = False,
showcoastlines = False,
projection = dict(
type = 'Mercator'
)
)
)
fig = dict(data=data, layout=layout)
# py.iplot( fig, validate=False, filename='d3-world-map' )
plot( fig, validate=False, filename='d3-world-map')
Out[121]:
In [144]:
fig = {
'data': [
{
'x': df2007.gdpPercap,
'y': df2007.lifeExp,
'text': df2007.country,
'mode': 'markers',
'name': '2007'},
{
'x': df1952.gdpPercap,
'y': df1952.lifeExp,
'text': df1952.country,
'mode': 'markers',
'name': '1952'}
],
'layout': {
'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
'yaxis': {'title': "Life Expectancy"}
}
}
In [165]:
data = []
year = 'GDP_2015'
data.append({
'x': data1[year],
'y': data1['cluster'],
'mode': 'markers',
'text': data1['Country'],
'name': year,
'colors': chart_colors
})
In [166]:
fig = dict(data=data, layout=layout)
# py.iplot( fig, validate=False, filename='d3-world-map' )
plot( fig, validate=False, filename='d3-world-map')
Out[166]:
In [174]:
from sklearn import datasets
# import some data to play with
iris = datasets.load_iris()
X = iris.data # [:, :2] # we only take the first two features.
Y = iris.target
In [175]:
X[:5]
Out[175]:
In [190]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, random_state=0)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
In [199]:
from sklearn.metrics import accuracy_score
In [205]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf = clf.fit(X_train, y_train)
In [206]:
accuracy_score(clf.predict(X_train), y_train)
Out[206]:
In [207]:
accuracy_score(clf.predict(X_test), y_test)
Out[207]:
In [208]:
accuracy_score(clf.predict(X), Y)
Out[208]:
In [210]:
from sklearn import svm
clf = svm.SVC(kernel='linear', C=2)
clf = clf.fit(X_train, y_train)
In [211]:
accuracy_score(clf.predict(X_train), y_train)
Out[211]:
In [212]:
accuracy_score(clf.predict(X_test), y_test)
Out[212]:
In [213]:
accuracy_score(clf.predict(X), Y)
Out[213]: