In [2]:
import numpy as np, requests, pandas as pd, zipfile, StringIO
In [3]:
countries=pd.read_html('http://www.geonames.org/countries/',header=0,infer_types=False)[1]
countries.columns=['ISO2','ISO3','ISONUM','FIPS','Country','Capital','Area','Population','Continent']
countries.set_index('Country',drop=True,inplace=True)
countries.head(5)
Out[3]:
In [4]:
hdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'LEx, EDUx, INCx',header=1).dropna(axis=1, how='all').set_index('country')
hdi.head()
Out[4]:
In [5]:
ihdi=pd.read_excel('ihdi_series_cartagena.xlsx',sheetname=u'iLEx, iEDUx,iINCx',header=1).dropna(axis=1, how='all').set_index('country')
ihdi.head()
Out[5]:
In [6]:
gii=pd.read_excel('gii_series_cartagena.xlsx',sheetname=u'GII',header=2).dropna(axis=1, how='all').set_index('Country')
gii.head()
Out[6]:
In [7]:
cc={
'Bolivia (Plurinational State of)':'Bolivia',
'Brunei Darussalam':'Brunei',
'Congo':'Republic of the Congo',
'Congo (Democratic Republic of the)':'Democratic Republic of the Congo',
"Cote d'Ivoire":'Ivory Coast',
'Hong Kong, China (SAR)':'Hong Kong',
'Iran (Islamic Republic of)':'Iran',
"Korea (Democratic People's Republic of)":'North Korea',
'Korea (Republic of)':'South Korea',
"Lao People's Democratic Republic":'Laos',
'Libyan Arab Jamahiriya':'Libya',
'Micronesia (Federated States of)':'Micronesia',
'Moldova (Republic of)':'Moldova',
'Myanmar':'Myanmar [Burma]',
'Palestine (State of)':'Palestine',
'Russian Federation':'Russia',
'Sao Tome and Principe':u'São Tomé and PrÃncipe',
'Syrian Arab Republic':'Syria',
'Tanzania (United Republic of)':'Tanzania',
'The former Yugoslav Republic of Macedonia':'Macedonia',
'Timor-Leste':'East Timor',
'Venezuela (Bolivarian Republic of)':'Venezuela'
}
cc2={
"Cote d'Ivoire":u"Côte d'Ivoire",
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Palestine (State of)':'Palestine, State of',
'Vietnam':'Viet Nam',
'Libyan Arab Jamahiriya':'Libya'
}
cc3={
'Libyan Arab Jamahiriya':'Libya'
}
cc4={
"Korea (Democratic People's Republic of)":"Korea (Democratic People's Rep. of)",
'Libyan Arab Jamahiriya':'Libya'
}
continent_converter={
'EU':'Europe',
'AS':'Asia',
'SA':'South America',
'nan':'North America',
'AN':'Antarctica',
'AF':'Africa',
'OC':'Oceania'
}
def country_name_converter(country):
if country in cc: return cc[country]
else: return country
def country_name_converter2(country):
if country in cc2: return cc2[country]
else: return country
def country_name_converter3(country):
if country in cc3: return cc3[country]
else: return country
def country_name_converter4(country):
if country in cc4: return cc4[country]
else: return country
In [8]:
data={}
for country in ihdi.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
data[countries.loc[icountry]['ISONUM']]={}
data[countries.loc[icountry]['ISONUM']]['name']=icountry
data[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
data[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
data[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
data[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
data[countries.loc[icountry]['ISONUM']]['hdi']={}
data[countries.loc[icountry]['ISONUM']]['ihdi']={}
data[countries.loc[icountry]['ISONUM']]['gii']={}
for year in range(2010,2014):
try:
data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
if np.isnan(hdi.loc[country]['edux'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=round(hdi.loc[country]['edux'+repr(year)],3)
if np.isnan(hdi.loc[country]['lex'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=round(hdi.loc[country]['lex'+repr(year)],3)
if np.isnan(hdi.loc[country]['gnix'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=round(hdi.loc[country]['gnix'+repr(year)],3)
except: print country,'hdi'
try:
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={}
if np.isnan(ihdi.loc[country]['iedux'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['edu']=round(ihdi.loc[country]['iedux'+repr(year)],3)
if np.isnan(ihdi.loc[country]['ilex'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['le']=round(ihdi.loc[country]['ilex'+repr(year)],3)
if np.isnan(ihdi.loc[country]['ignix'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']="NaN"
else: data[countries.loc[icountry]['ISONUM']]['ihdi'][year]['gni']=round(ihdi.loc[country]['ignix'+repr(year)],3)
except:
print country,'ihdi'
try:
gcountry=country_name_converter2(country)
if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
except:
print country,'gii'
In [33]:
import json
file('data.json','w').write(json.dumps(data))
In [9]:
GNI=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Gross National Income',header=2).set_index('Country')
GNI.head()
Out[9]:
In [10]:
LE=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Life Expectency',header=2).set_index('Country')
LE.head()
Out[10]:
In [11]:
EYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Expected Years of Schooling',header=2).set_index('Country')
EYS.head()
Out[11]:
In [63]:
MYS=pd.read_excel('hdi_series_cartagena.xlsx',sheetname=u'Mean Years of Schooling',header=2).set_index('Country')
MYS.head()
#fux db
MYS.loc["Vanuatu","MYS1980"]=np.NaN
In [16]:
data2={}
for country in GNI.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
data2[countries.loc[icountry]['ISONUM']]={}
data2[countries.loc[icountry]['ISONUM']]['name']=icountry
data2[countries.loc[icountry]['ISONUM']]['code']=countries.loc[icountry]['ISO3']
data2[countries.loc[icountry]['ISONUM']]['population']=countries.loc[icountry]['Population']
data2[countries.loc[icountry]['ISONUM']]['area']=countries.loc[icountry]['Area']
data2[countries.loc[icountry]['ISONUM']]['continent']=continent_converter[countries.loc[icountry]['Continent']]
data2[countries.loc[icountry]['ISONUM']]['hdi']={}
for year in range(1980,2014):
try:
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
data2[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
except: print country,'hdi',year
In [231]:
import json
file('data2.json','w').write(json.dumps(data2))
In [64]:
for country in GNI.index:
if country_name_converter(country) in countries.index:
icountry=country_name_converter(country)
for year in range(1980,2010):
try:
data[countries.loc[icountry]['ISONUM']]['hdi'][year]={}
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']=\
round((MYS.loc[country_name_converter3(country)]['MYS'+repr(year)]/15+\
EYS.loc[country_name_converter3(country)]['EYS'+repr(year)]/18)/2,3)
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']=\
round((LE.loc[country_name_converter4(country)]['Life_Exp'+repr(year)]-20)/(85-20),3)
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']=\
round((np.log(GNI.loc[country]['GNI'+repr(year)])-np.log(100))/(np.log(75000)-np.log(100)),3)
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['edu']="NaN"
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['le']="NaN"
if np.isnan(data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']):
data[countries.loc[icountry]['ISONUM']]['hdi'][year]['gni']="NaN"
data[countries.loc[icountry]['ISONUM']]['ihdi'][year]={'edu':"NaN",'le':"NaN",'gni':"NaN"}
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
if year in [1995,2000,2005]:
try:
gcountry=country_name_converter2(country)
if np.isnan(gii.loc[gcountry]['GII'+repr(year)]):
data[countries.loc[icountry]['ISONUM']]['gii'][year]="NaN"
else: data[countries.loc[icountry]['ISONUM']]['gii'][year]=round(gii.loc[gcountry]['GII'+repr(year)],3)
except:
print country,'gii'
except: print country,'error',year
In [18]:
import json
file('data3.json','w').write(json.dumps(data))
In [65]:
pop=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheet="ESTIMATES",header=16)
In [66]:
pop=pop.set_index(['Country code'])
In [67]:
for i in data:
data[i]['population']={}
for year in range(1980,2011):
data[i]['population'][year]=\
pop.loc[(int)(i)].values[::-1][2010-year]*1000
In [68]:
pop2=pd.read_excel('http://esa.un.org/unpd/wpp/Excel-Data/EXCEL_FILES/1_Population/WPP2012_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.XLS',sheetname="NO CHANGE",header=16)
In [69]:
pop2=pop2.set_index(['Country code'])
In [70]:
for i in data:
for year in range(2011,2014):
data[i]['population'][year]=\
pop2.loc[(int)(i)].values[::-1][2100-year]*1000
In [71]:
import json
file('data3.json','w').write(json.dumps(data))