In [1]:
import numpy as np, requests, pandas as pd, zipfile, StringIO
In [2]:
url='http://api.worldbank.org/v2/en/indicator/ny.gdp.pcap.pp.kd?downloadformat=csv'
filename='ny.gdp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gdp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gdp.head(2)
In [3]:
url='http://api.worldbank.org/v2/en/indicator/ny.gnp.pcap.pp.kd?downloadformat=csv'
filename='ny.gnp.pcap.pp.kd_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
gnp=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
gnp.head(2)
In [51]:
url='http://api.worldbank.org/v2/en/indicator/sp.dyn.le00.in?downloadformat=csv'
filename='sp.dyn.le00.in_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
le=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
le.head(2)
Out[51]:
In [53]:
url='http://api.worldbank.org/v2/en/indicator/se.adt.litr.zs?downloadformat=csv'
filename='se.adt.litr.zs_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
alr=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
alr.head(2)
Out[53]:
In [54]:
url='http://api.worldbank.org/v2/en/indicator/se.prm.enrr?downloadformat=csv'
filename='se.prm.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger1=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger1.head(2)
Out[54]:
In [55]:
url='http://api.worldbank.org/v2/en/indicator/se.sec.enrr?downloadformat=csv'
filename='se.sec.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger2=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger2.head(2)
Out[55]:
In [56]:
url='http://api.worldbank.org/v2/en/indicator/se.ter.enrr?downloadformat=csv'
filename='se.ter.enrr_Indicator_en_csv_v2.csv'
r = requests.get(url)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
ger3=pd.read_csv(z.open(filename),skiprows=[0,1]).drop('Unnamed: 58',axis=1).drop('Indicator Code',axis=1)
ger3.head(2)
Out[56]:
In [ ]:
In [3]:
ids=pd.read_csv('http://bl.ocks.org/d/4090846/world-country-names.tsv',sep='\t').set_index(['name'],drop=True)
ids.head()
Out[3]:
In [ ]:
In [4]:
def country_name_converter(country):
if country=="Venezuela, Bolivarian Republic of": return "Venezuela (Bolivarian Republic of)"
elif country=="Tanzania, United Republic of": return "Tanzania (United Republic of)"
elif country=="Moldova, Republic of": return "Moldova (Republic of)"
elif country=="Micronesia, Federated States of": return "Micronesia (Federated States of)"
elif country=="Macedonia, the former Yugoslav Republic of": return "The former Yugoslav Republic of Macedonia"
elif country=="Korea, Republic of": return "Korea (Republic of)"
elif country=="Korea, Democratic People's Republic of": return "Korea (Democratic People's Rep. of)"
elif country=="Côte d'Ivoire": return "C\xc3\xb4te d'Ivoire"
elif country=="Iran, Islamic Republic of": return "Iran (Islamic Republic of)"
elif country=="Hong Kong": return "Hong Kong, China (SAR)"
elif country=="Palestinian Territory, Occupied": return "Palestine, State of"
elif country=="Congo, the Democratic Republic of the": return "Congo (Democratic Republic of the)"
elif country=="Bolivia, Plurinational State of": return "Bolivia (Plurinational State of)"
else: return country
In [6]:
import re
codes={}
for i in ids.index:
try:
a=[i]
a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Human Development Index (HDI) Value, 2013"])),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Mean years of schooling (years), 2012 a"]))/15+\
float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Expected years of schooling (years), 2012 a"]))/18)/2,3))
a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
[u"Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
/(np.log(75000)-np.log(100)),3))
a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
codes[repr(ids.loc[i][0])]=a
except: pass
In [8]:
import json
file('hdi2.json','w').write(json.dumps(codes))