In [1]:
import pandas as pd
import numpy as np
import folium
# ***************************** WHO DATASET ************************************
# The following short abbreviations have been used to name the column of the
# dataset in a more convenient way with respect to the original names
# ******************************************************************************
#ALRI Acute lower respiratory infections
#BABT Birth asphyxia and birth trauma
#OCPNC Other communicable, perinatal and nutritional conditions
#SOICN Sepsis and other infectious conditions of the newborn
#CA Congenital anomalies
#DD Diarrhoeal diseases
#AIDS HIV/AIDS
#INJ Injuries
#MAL Malaria
#MEA Measles
#MEN Meningitis/encephalitis
#OND Other noncommunicable diseases
#PER Pertussis
#PRE Prematurity
#
#N0-27D from 0 to 27 days of life
#N1-59M from 1 to 59 months of life
#N0-4Y from 0 to 4 years of life (N0-27D + N1-59M)
# ******************************************************************************
df_who = pd.read_csv('./mort_child_ds.csv', sep=';', encoding = "ISO-8859-1")
In [2]:
# The list of countries is saved in a file to find the matching names with geo dataset
# to add the GEO ISO codes to the who dataset
df_who_countries = df_who[['Country']].drop_duplicates(keep='first')
# Let's create a temp directory
![ -d "./tmp" ]; then rm -fr ./tmp fi;
!mkdir tmp
# ... and now we can write there the file
df_who_countries.to_csv('tmp/df_who_countries.txt', sep=';')
In [3]:
df_who.head(5)
Out[3]:
In [4]:
df_who.columns
Out[4]:
In [5]:
# Adding a column for the sum of each cause contribution for Child Mortality
df_who['TOT']= df_who['N0-4Y-ALRI']+df_who['N0-4Y-BABT']+df_who['N0-4Y-OCPNC']+df_who['N0-4Y-SOICN']+ \
df_who['N0-4Y-CA']+df_who['N0-4Y-DD']+df_who['N0-4Y-AIDS']+df_who['N0-4Y-INJ']+ \
df_who['N0-4Y-MAL']+df_who['N0-4Y-MEA']+df_who['N0-4Y-MEN']+df_who['N0-4Y-OND']+ \
df_who['N0-4Y-PER']+df_who['N0-4Y-PRE']
In [6]:
# ***************************** GEOGRAPHICAL INFORMATION ******************************************
df_geo_ds = pd.read_csv('./geo_ds.csv', sep=';', encoding = "ISO-8859-1")
In [7]:
df_geo_ds.head(10)
Out[7]:
In [8]:
df_geo_ds_codes = df_geo_ds[['ISO3','Country','latitude','longitude','Population']].copy()
In [9]:
# Creating a file with the GEO dataset list of countries' names (duplicates are present for
# the different time zones in the same countries)
df_geo_ds_codes = df_geo_ds_codes.drop_duplicates(keep='first')
df_geo_ds_codes.to_csv('tmp/df_geo_ds_codes.csv', sep=';')
In [10]:
df_geo_ds_codes.head(10)
Out[10]:
In [11]:
# A file containing only the WHO Countries' names is created: columns with ';' separator
!awk 'BEGIN {FS=";"}; {print $2}' tmp/df_who_countries.txt > tmp/df_who_countries_names.txt
# A file containing only the GEO Countries' names is created: column with ';' separator
!awk 'BEGIN {FS=";"}; {print $3}' tmp/df_geo_ds_codes.csv > tmp/df_geo_ds_codes_names.csv
# A file with rows in df_geo_ds_codes_names.csv and not in df_who_countries_names.txt
# is created
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/df_geo_ds_codes_names.csv tmp/df_who_countries_names.txt > tmp/not_in_geo.txt
# A file with rows in df_who_countries_names.txt and not in df_geo_ds_codes_names.csv
# is created
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/df_who_countries_names.txt tmp/df_geo_ds_codes_names.csv > tmp/not_in_who.txt
In [12]:
!cat tmp/not_in_geo.txt
In [13]:
!cat tmp/not_in_who.txt
In [14]:
df_who['Country'].replace('Brunei Darussalam','Brunei', inplace=True)
df_who['Country'].replace('Côte d''Ivoire','Ivory Coast', inplace=True)
df_who['Country'].replace('Cabo Verde','Cape Verde', inplace=True)
df_who['Country'].replace('Congo','Republic of the Congo', inplace=True)
df_who['Country'].replace('Democratic People''s Republic of Korea','North Korea', inplace=True)
df_who['Country'].replace('Iran (Islamic Republic of)','Iran', inplace=True)
df_who['Country'].replace('Lao People''s Democratic Republic','Laos', inplace=True)
df_who['Country'].replace('Micronesia (Federated States of)','Micronesia', inplace=True)
df_who['Country'].replace('Republic of Korea','South Korea', inplace=True)
df_who['Country'].replace('Republic of Moldova','Moldova', inplace=True)
df_who['Country'].replace('Russian Federation','Russia', inplace=True)
df_who['Country'].replace('Syrian Arab Republic','Syria', inplace=True)
df_who['Country'].replace('The former Yugoslav republic of Macedonia','Macedonia', inplace=True)
df_who['Country'].replace('Timor-Leste','East Timor', inplace=True)
df_who['Country'].replace('United Kingdom of Great Britain and Northern Ireland','United Kingdom', inplace=True)
df_who['Country'].replace('United Republic of Tanzania','Tanzania', inplace=True)
df_who['Country'].replace('United States of America','United States', inplace=True)
df_who['Country'].replace('Venezuela (Bolivarian Republic of)','Venezuela', inplace=True)
df_who['Country'].replace('Viet Nam','Vietnam', inplace=True)
In [15]:
# Now it is possible to merge the geo and the who dataset to add the ISO code to who dataset
df_who_geo = pd.merge(df_who, df_geo_ds_codes, left_on='Country', right_on='Country', how='inner')
In [16]:
# The value of the 'Year' column must be a string to be trasposed in a column
df_who_geo[['Year']] = df_who_geo[['Year']].astype(str)
df_who_geo.head(5)
Out[16]:
In [17]:
# From who-geo dataset we are going to create a new dataset using Year values as columns
df_who_geo_years = df_who_geo.pivot_table(index='ISO3', columns='Year', values='TOT').copy()
df_who_geo_years.reset_index(inplace = True)
In [18]:
df_who_geo_years.head(5)
Out[18]:
In [19]:
df_who_geo_years.columns
Out[19]:
In [20]:
df_who_geo_years.set_index('ISO3')
Out[20]:
In [21]:
# ***************************** IMF *******************************************
df_imf = pd.read_csv('./imf_weo_ds.csv',sep=';', encoding = "ISO-8859-1")
In [22]:
df_imf.columns
Out[22]:
In [23]:
df_imf_filt = df_imf[['ISO','Country','Units','Scale','2000','2001', '2002',
'2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
'2012', '2013', '2014', '2015','Estimates Start After','WEO Subject Code','Subject Descriptor','Subject Notes']].copy()
In [24]:
# Let's see how this dataset is composed for a single country
df_imf_filt[df_imf_filt['Country']=='Italy'][['WEO Subject Code','Subject Descriptor', 'Subject Notes']]
Out[24]:
In [25]:
# Population
df_imf_pop = df_imf_filt[df_imf_filt['WEO Subject Code']=='LP']
# Employment
df_imf_empl = df_imf_filt[df_imf_filt['WEO Subject Code']=='LE']
# Unemployment rate
df_imf_unempl_rate = df_imf_filt[df_imf_filt['WEO Subject Code']=='LUR']
# GDP procapita
df_imf_gdp_pc = df_imf_filt[df_imf_filt['WEO Subject Code']=='NGDPDPC']
# GDP
df_imf_gdp = df_imf_filt[df_imf_filt['WEO Subject Code']=='NGDPD']
# GDP procapita based on purchasing-power-parity (PPP)
df_imf_gdp_xcppp_cp = df_imf_filt[df_imf_filt['WEO Subject Code']=='PPPPC']
In [26]:
# Let's create a new imf dataset for population along years from 2000 to 2015
df_imf_pop_years = df_imf_pop[['ISO','2000','2001', '2002','2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011','2012', '2013', '2014', '2015']].copy()
df_imf_pop_years.replace('n/a',0,inplace=True)
In [27]:
# Let's create a new imf dataset for gdp pro capita along years from 2000 to 2015
df_imf_gdp_pc_years = df_imf_gdp_pc[['ISO','2000','2001', '2002','2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011','2012', '2013', '2014', '2015']].copy()
df_imf_gdp_pc_years.replace('n/a',0,inplace=True)
In [28]:
# We now merge the who-geo-years dataset with the imf-pop-years dataset using _pop and _who suffixes
# to distinguish the value of pop and who for the same year
df_who_pop_geo_years = pd.merge(df_who_geo_years, df_imf_pop_years, left_on='ISO3', right_on='ISO', suffixes=['_who', '_pop'], how='inner')
df_who_pop_geo_years.head(5)
Out[28]:
In [29]:
# And now we merge the who-geo-years dataset with the imf-gdp-xc-years dataset using _gdp and _who suffixes
# to distinguish the value of pop and who for the same year
df_who_gdp_geo_years = pd.merge(df_who_geo_years, df_imf_gdp_pc_years, left_on='ISO3', right_on='ISO', suffixes=['_who', '_gdp'], how='inner')
df_who_gdp_geo_years.head(5)
Out[29]:
In [30]:
df_who_gdp_geo_years.columns
Out[30]:
In [31]:
# Loop to clean the gdp values from the characters ',' and '.'
for year in ['2000', '2001', '2002', '2003', '2004','2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012', '2013', '2014', '2015']:
df_who_gdp_geo_years[year+'_gdp'] = df_who_gdp_geo_years[year+'_gdp'].astype(np.str).str.replace(',','')
df_who_gdp_geo_years[year+'_gdp'] = df_who_gdp_geo_years[year+'_gdp'].astype(np.str).str.replace('.','')
df_who_gdp_geo_years.head(5)
Out[31]:
In [32]:
df_who_pop_geo_years.columns
Out[32]:
In [33]:
# Calculating the ratio between total who number and total population for each country in
# each year
for year in ['2000', '2001', '2002', '2003', '2004','2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012', '2013', '2014', '2015']:
df_who_pop_geo_years[year+'_pop'] = df_who_pop_geo_years[year+'_pop'].astype(np.str).str.replace(',','')
df_who_pop_geo_years[year+'_pop'] = df_who_pop_geo_years[year+'_pop'].astype(np.str).str.replace('.','')
df_who_pop_geo_years[year+'_who']= df_who_pop_geo_years[year+'_who'].astype(float)/((df_who_pop_geo_years[year+'_pop'].astype(float)*1000)+df_who_pop_geo_years[year+'_who'].astype(float))
# Renaming columns to get the original names for the years
df_who_pop_geo_years.rename(columns={'2000_who':'2000', '2001_who':'2001', '2002_who':'2002', '2003_who':'2003', '2004_who':'2004',
'2005_who':'2005', '2006_who':'2006', '2007_who':'2007', '2008_who':'2008', '2009_who':'2009', '2010_who':'2010',
'2011_who':'2011', '2012_who':'2012', '2013_who':'2013', '2014_who':'2014', '2015_who':'2015'}, inplace=True)
# let's drop columns from imf (population) no more useful in this dataset
columns = ['2000_pop', '2001_pop', '2002_pop', '2003_pop', '2004_pop', '2005_pop',
'2006_pop', '2007_pop', '2008_pop', '2009_pop', '2010_pop', '2011_pop',
'2012_pop', '2013_pop', '2014_pop', '2015_pop', 'ISO']
df_who_pop_geo_years.drop(columns, inplace=True, axis=1)
# The value 1 means that the population for that country in that year was not present
df_who_pop_geo_years.replace(1,0, inplace=True)
df_who_pop_geo_years.head(200)
Out[33]:
In [34]:
index=['2000', '2001', '2002', '2003', '2004','2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012', '2013', '2014', '2015']
columns = ['Correlation']
gdp_who_data_corr_years = pd.DataFrame(index=index, columns=columns)
# We expect a negative coorelation for gdp vs who (increase gdp should imply a who decrease)
for year in ['2000', '2001', '2002', '2003', '2004','2005', '2006', '2007', '2008', '2009', '2010',
'2011', '2012', '2013', '2014', '2015']:
df_who_gdp_geo_years[year+'_gdp'] = df_who_gdp_geo_years[year+'_gdp'].astype(float)
df_who_gdp_geo_years[year+'_who'] = df_who_gdp_geo_years[year+'_who'].astype(float)
gdp_who_data_corr_years.at[year,'Correlation'] = df_who_gdp_geo_years[year+'_gdp'].corr(df_who_gdp_geo_years[year+'_who'])
gdp_who_data_corr_years.head(16)
Out[34]:
In [35]:
# ***************************** REL *******************************************
df_rel = pd.read_csv('./religions_corr_ds_new.csv', sep=';', encoding = "ISO-8859-1")
In [36]:
df_rel.head(5)
Out[36]:
In [37]:
# Let's replace NaN with 0
df_rel = df_rel.fillna(0)
In [38]:
df_rel_countries = df_rel[['Country']]
df_rel_countries.to_csv('tmp/rel_countries.txt')
In [39]:
# A file containing only the WHO Countries' names is created: column with ',' separator
!awk 'BEGIN {FS=","}; {print $2}' tmp/rel_countries.txt > tmp/rel_countries_names.txt
# A file with rows in df_geo_ds_codes_names.csv and not in df_who_countries_names.txt is created
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/df_geo_ds_codes_names.csv tmp/rel_countries_names.txt > tmp/not_in_geo_rel.txt
# A file with rows in df_who_countries_names.txt and not in df_geo_ds_codes_names.csv is created
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/rel_countries_names.txt tmp/df_geo_ds_codes_names.csv > tmp/not_in_rel.txt
In [40]:
!cat tmp/not_in_geo_rel.txt
In [41]:
!cat tmp/not_in_rel.txt
In [42]:
df_rel['Country'].replace('Bahamas The','Bahamas',inplace=True)
df_rel['Country'].replace('Bosnia and Herzegovina','Bosnia and Herzegovina',inplace=True)
df_rel['Country'].replace('Cabo Verde','Cape Verde',inplace=True)
df_rel['Country'].replace('Congo Democratic Republic of the','Democratic Republic of the Congo',inplace=True)
df_rel['Country'].replace('Congo Republic of the','Republic of the Congo',inplace=True)
df_rel['Country'].replace('Cote d''Ivoire','Ivory Coast',inplace=True)
df_rel['Country'].replace('Gambia The','Gambia',inplace=True)
df_rel['Country'].replace('Holy See','Vatican',inplace=True)
df_rel['Country'].replace('Korea North','North Korea',inplace=True)
df_rel['Country'].replace('Korea South','South Korea',inplace=True)
df_rel['Country'].replace('Macau','Macao',inplace=True)
df_rel['Country'].replace('Micronesia Federated States of','Micronesia',inplace=True)
df_rel['Country'].replace('Niue Ekalesia','Niue',inplace=True)
df_rel['Country'].replace('Pitcairn Islands','Pitcairn',inplace=True)
df_rel['Country'].replace('Saint Helena Ascension and Tristan da Cunha','Saint Helena',inplace=True)
df_rel['Country'].replace('Timor-Leste','East Timor',inplace=True)
df_rel['Country'].replace('Virgin Islands','U.S. Virgin Islands',inplace=True)
df_rel['Country'].replace('Burma','Myanmar',inplace=True)
df_rel['Country'].replace('West Bank','Palestinian Territory',inplace=True)
In [43]:
# Now we can merge the geo datset with the rel dataset and add geo codes to rel dataset
df_rel_geo = pd.merge(df_rel, df_geo_ds_codes, left_on='Country', right_on='Country', how='inner')
In [44]:
df_rel_geo.head(5)
Out[44]:
In [45]:
df_rel_geo.columns
Out[45]:
In [46]:
# Now we can merge the who datset with the rel dataset
df_rel_who = pd.merge(df_rel_geo, df_who_geo_years, on='ISO3',how='inner')
In [47]:
index1=['Muslim', 'Catholic', 'Protestant', 'Buddhism','Orthodox Christian', 'Hinduism',
'Shintoism', 'Jewish','Christians', 'Atheism']
columns1 = ['Correlation']
rel_who_data_corr_years = pd.DataFrame(index=index1, columns=columns1)
for col in index1:
df_rel_who[col] = df_rel_who[col].astype(float)
rel_who_data_corr_years.at[col,'Correlation'] = df_rel_who['2015'].corr(df_rel_who[col])
rel_who_data_corr_years.head(10)
Out[47]:
In [48]:
index2=['latitude','longitude']
columns2 = ['Correlation']
rel_geo_data_corr_years = pd.DataFrame(index=index2, columns=columns2)
for col in index2:
df_rel_who[col] = df_rel_who[col].str.replace(',','.')
df_rel_who[col] = df_rel_who[col].astype(float)
rel_geo_data_corr_years.at[col,'Correlation'] = df_rel_who['2015'].corr(df_rel_who[col])
rel_geo_data_corr_years.head(2)
Out[48]:
In [49]:
# The file countries.json will be used to get data in a geographical map using folium.
# Let's get from countries.json the list of ISO codes used in this file and let's compare it
# to the list of ISO code in the datasets: json codes will be listed in the file
# tmp/geojson_codes.txt
!python -m json.tool countries.geojson | grep -i \"adm0_a3\": | sed s/' \"adm0_a3\": \"'//g | sed s/'\"\,'//g > tmp/geojson_codes.txt
# Let's now create files containing the geo and imf datasets' geo codes
!awk 'BEGIN {FS=";"}; {print $2}' tmp/df_geo_ds_codes.csv > tmp/geo_country_codes.txt
!awk 'BEGIN {FS=";"}; {print $2}' imf_weo_ds.csv | uniq > tmp/imf_country_codes.txt
# And now we create files containing the rel and who datasets' geo codes
df_rel_geo[['ISO3']].to_csv('tmp/rel_geo_countries.txt')
df_who_geo[['ISO3']].to_csv('tmp/who_geo_countries.txt')
# Finally we create files containing only the rel and who datasets' geo codes
!awk 'BEGIN {FS=","}; {print $2}' tmp/rel_geo_countries.txt > tmp/rel_geo_country_codes.txt
!awk 'BEGIN {FS=","}; {print $2}' tmp/who_geo_countries.txt | uniq > tmp/who_geo_country_codes.txt
# Codes used in geojson file vs codes used in imf dataset
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/geojson_codes.txt tmp/imf_country_codes.txt > tmp/not_in_geojson_from_imf.txt
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/imf_country_codes.txt tmp/geojson_codes.txt > tmp/not_in_imf_from_geojson.txt
# Codes used in geojson file vs codes used in geo dataset
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/geojson_codes.txt tmp/geo_country_codes.txt > tmp/not_in_geojson_from_geo.txt
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/geo_country_codes.txt tmp/geojson_codes.txt > tmp/not_in_geo_from_geojson.txt
# Codes used in geojson file vs codes used in rel dataset
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/geojson_codes.txt tmp/rel_geo_country_codes.txt > tmp/not_in_geojson_from_rel.txt
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/rel_geo_country_codes.txt tmp/geojson_codes.txt > tmp/not_in_rel_from_geojson.txt
# Codes used in geojson file vs codes used in who dataset
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/geojson_codes.txt tmp/who_geo_country_codes.txt > tmp/not_in_geojson_from_who.txt
!awk 'FNR==NR {a[$0]++; next} !a[$0]' tmp/who_geo_country_codes.txt tmp/geojson_codes.txt > tmp/not_in_who_from_geojson.txt
In [50]:
!cat tmp/not_in_imf_from_geojson.txt| grep -v ISO | awk -vORS=, '{ print $1 }' | sed 's/,$/\n/'|sed "s/,/','/g"
In [51]:
# We now can remove the codes in the geojson file not present in the IMF dataset
!ogr2ogr -f "GeoJSON" filtered_imf.geojson -dialect SQLITE -sql "SELECT * FROM OGRGeoJSON WHERE adm0_a3 NOT IN ('ATA', 'ATF','CUB','CYN','FLK','GRL','KOS','NCL','PRK','PSX','SAH','SDS','SOL','SOM')" countries.geojson
In [52]:
!cat tmp/not_in_who_from_geojson.txt| grep -v ISO3 | awk -vORS=, '{ print $1 }' | sed 's/,$/\n/'|sed "s/,/','/g"
In [53]:
# We now can remove the codes in the geojson file not present in the WHO dataset
!ogr2ogr -f "GeoJSON" filtered_who.geojson -dialect SQLITE -sql "SELECT * FROM OGRGeoJSON WHERE adm0_a3 NOT IN ('ATA','ATF','BOL','CIV','CYN','FLK','GRL','KOS','LAO','NCL','PRI','PRK','PSX','SAH','SDS','SOL','TWN')" countries.geojson
In [54]:
!cat tmp/not_in_rel_from_geojson.txt| grep -v ISO3 | awk -vORS=, '{ print $1 }' | sed 's/,$/\n/'|sed "s/,/','/g"
In [55]:
# We now can remove the codes in the geojson file not present in the REL dataset
!ogr2ogr -f "GeoJSON" filtered_rel.geojson -dialect SQLITE -sql "SELECT * FROM OGRGeoJSON WHERE adm0_a3 NOT IN ('ATA','ATF','BIH','CIV','CYN','KOS','PSX','SAH','SDS','SOL')" countries.geojson
In [56]:
!cat tmp/not_in_geo_from_geojson.txt| grep -v ISO3 | awk -vORS=, '{ print $1 }' | sed 's/,$/\n/'|sed "s/,/','/g"
In [57]:
# We now can remove the codes in the geojson file not present in the REL dataset
!ogr2ogr -f "GeoJSON" filtered_geo.geojson -dialect SQLITE -sql "SELECT * FROM OGRGeoJSON WHERE adm0_a3 NOT IN ('CYN','KOS','PSX','SAH','SDS','SOL')" countries.geojson
In [58]:
df_data_plot = df_imf_gdp_pc[['ISO','Country','Units','Scale','2000']]
In [59]:
!cat tmp/not_in_geojson_from_imf.txt| grep -v ISO | awk -vORS=, '{ print $1 }' | sed 's/,$/\n/'|sed "s/,/','/g"
In [60]:
df_data_plot = df_data_plot[~df_data_plot['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
In [61]:
df_data_plot.replace('n/a',0,inplace=True)
df_data_plot['2000'] = df_data_plot['2000'].astype(np.str).str.replace(',','')
df_data_plot['2000'] = df_data_plot['2000'].astype(np.str).str.replace('.','')
df_data_plot['2000'] = df_data_plot['2000'].astype(np.int64)
df_data_plot.head(5)
Out[61]:
In [62]:
df_data_plot2 = df_imf_gdp_pc[['ISO','Country','Units','Scale','2015']]
df_data_plot2 = df_data_plot2[~df_data_plot2['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot2.replace('n/a',0,inplace=True)
df_data_plot2['2015'] = df_data_plot2['2015'].astype(np.str).str.replace(',','')
df_data_plot2['2015'] = df_data_plot2['2015'].astype(np.str).str.replace('.','')
df_data_plot2['2015'] = df_data_plot2['2015'].astype(np.int)
df_data_plot2.head(5)
Out[62]:
In [63]:
df_data_plot3 = df_imf_pop[['ISO','Country','Units','Scale','2015']]
df_data_plot3 = df_data_plot3[~df_data_plot3['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot3.replace('n/a',0,inplace=True)
df_data_plot3['2015'] = df_data_plot3['2015'].astype(np.str).str.replace(',','')
df_data_plot3['2015'] = df_data_plot3['2015'].astype(np.str).str.replace('.','')
df_data_plot3['2015'] = df_data_plot3['2015'].astype(np.int)
df_data_plot3.head(5)
Out[63]:
In [64]:
df_data_plot4 = df_imf_gdp[['ISO','Country','Units','Scale','2015']]
df_data_plot4 = df_data_plot4[~df_data_plot4['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot4.replace('n/a',0,inplace=True)
df_data_plot4['2015'] = df_data_plot4['2015'].astype(np.str).str.replace(',','')
#df_data_plot4['2015'] = df_data_plot4['2015'].astype(np.str).str.replace('.','')
df_data_plot4['2015'] = df_data_plot4['2015'].astype(np.float)
df_data_plot4.head(5)
Out[64]:
In [65]:
map_imf = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_imf.geojson"
map_imf.choropleth(geo_path=json_geo,data=df_data_plot,columns=['ISO', '2000'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='GDP procapita in US dollars - Year 2000')
map_imf
Out[65]:
In [66]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_imf.geojson"
map.choropleth(geo_path=json_geo,data=df_data_plot2,columns=['ISO', '2015'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='GDP procapita in US dollars - Year 2015')
#map.choropleth(geo_path=json_geo)
map
Out[66]:
In [67]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_imf.geojson"
map.choropleth(geo_path=json_geo,data=df_data_plot3,columns=['ISO', '2015'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2,legend_name='Population - Year 2015')
#map.choropleth(geo_path=json_geo)
map
Out[67]:
In [68]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_imf.geojson"
map.choropleth(geo_path=json_geo,data=df_data_plot4,columns=['ISO', '2015'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='GDP in US dollars - Year 2015')
#map.choropleth(geo_path=json_geo)
map
Out[68]:
In [69]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_who.geojson"
map.choropleth(geo_path=json_geo,data=df_who_pop_geo_years,columns=['ISO3', '2000'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Children mortality/Population - Year 2000')
map
Out[69]:
In [70]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_who.geojson"
map.choropleth(geo_path=json_geo,data=df_who_pop_geo_years,columns=['ISO3', '2015'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Children mortality/Population - Year 2015')
map
Out[70]:
In [71]:
df_rel_geo_mus = df_rel_geo[['Muslim','ISO3', 'Country']].copy()
df_rel_geo_cat = df_rel_geo[['Catholic','ISO3', 'Country']].copy()
df_rel_geo_pro = df_rel_geo[['Protestant','ISO3', 'Country']].copy()
df_rel_geo_bud = df_rel_geo[['Buddhism','ISO3', 'Country']].copy()
In [72]:
df_rel_geo_mus.head(5)
Out[72]:
In [73]:
df_rel_geo_mus['Muslim'] = df_rel_geo_mus['Muslim'].astype(str)
df_rel_geo_mus['Muslim'] = df_rel_geo_mus['Muslim'].apply(pd.to_numeric, errors='coerce')
df_rel_geo_cat['Catholic'] = df_rel_geo_cat['Catholic'].astype(str)
df_rel_geo_cat['Catholic'] = df_rel_geo_cat['Catholic'].apply(pd.to_numeric, errors='coerce')
df_rel_geo_pro['Protestant'] = df_rel_geo_pro['Protestant'].astype(str)
df_rel_geo_pro['Protestant'] = df_rel_geo_pro['Protestant'].apply(pd.to_numeric, errors='coerce')
df_rel_geo_bud['Buddhism'] = df_rel_geo_bud['Buddhism'].astype(str)
df_rel_geo_bud['Buddhism'] = df_rel_geo_bud['Buddhism'].apply(pd.to_numeric, errors='coerce')
df_rel_geo_mus.head(5)
Out[73]:
In [74]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_rel.geojson"
map.choropleth(geo_path=json_geo,data=df_rel_geo_mus,columns=['ISO3', 'Muslim'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Muslim distribution in the world')
map
Out[74]:
In [75]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_rel.geojson"
map.choropleth(geo_path=json_geo,data=df_rel_geo_cat,columns=['ISO3', 'Catholic'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Catholic distribution in the world')
map
Out[75]:
In [76]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_rel.geojson"
map.choropleth(geo_path=json_geo,data=df_rel_geo_pro,columns=['ISO3', 'Protestant'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Protestant distribution in the world')
map
Out[76]:
In [77]:
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered_rel.geojson"
map.choropleth(geo_path=json_geo,data=df_rel_geo_bud,columns=['ISO3', 'Buddhism'],key_on='feature.properties.adm0_a3',
fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, legend_name='Buddhism distribution in the world')
map
Out[77]: