In [18]:
import numpy as np, requests, pandas as pd
In [24]:
countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)
Out[24]:
In [35]:
codes={}
for i in countries.index:
#codes[countries.loc[i]['ISONUM']]=[countries.loc[i]['Population'],countries.loc[i]['ISO3']]
try: codes[int(countries.loc[i]['ISONUM'])]=[float(countries.loc[i]['Population']),str(countries.loc[i]['ISO3'])]
except: pass
In [94]:
import re
codes={}
for i in ids.index:
try:
a=[i]
a.append(round(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Human Development Index (HDI) Value, 2013"])),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Life expectancy at birth (years), 2013"]))-20)/(85-20),3))
a.append(round((float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Mean years of schooling (years), 2012 a"]))/15+\
float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Expected years of schooling (years), 2012 a"]))/18)/2,3))
a.append(round((np.log(float(re.sub(r'[^\d.]+', '',hdi.loc[country_name_converter(i)]\
["Gross national income (GNI) per capita (2011 PPP $), 2013"])))-np.log(100))\
/(np.log(75000)-np.log(100)),3))
a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
a.append(round((a[2]*a[3]*a[4])**(1.0/3.0),3))
codes[repr(ids.loc[i][0])]=a
except: pass
In [37]:
import json
file('../pop.json','w').write(json.dumps(codes))