In [ ]:
import os
import sys
working_directory = os.path.abspath('')
sys.path.append(os.path.normpath(os.path.join(working_directory, "..", "..")))
In [ ]:
# These libraries are used later to supply mathematical calculations.
import numpy as np
import pandas as pd
from math import e
from haversine import haversine
import ipywidgets as widgets
from ipywidgets import *
# Visualizaton
import matplotlib
import matplotlib.pyplot as plt
import gos
from gos.visualization import map_plot
%matplotlib inline
# Reduce some visual clutter by only printing ten rows at a time.
# This can be adjusted to match personal preferences.
pd.set_option("display.max_rows", 500)
In [ ]:
plt.style.use('ggplot')
The datasets used in the model are found in the /data
subdirectory. Many are formatted as either CSV files or XLSX files.
In [ ]:
%ls ./data
In [ ]:
skill = IntSlider(min=0, max=100, value=90)
display(skill)
The following shortcut functions helps locate these data files easily.
In [ ]:
def file_path(name):
"""
Shortcut function to get the relative path to the directory
which contains the data.
"""
return "./data/%s" % name
In [ ]:
column_names = ["Name", "Code"]
def country_codes():
"""
Build country rows from their names, ISO codes, and Numeric
Country Codes.
"""
cc = (
pd.read_csv(
file_path(
"Country_List_ISO_3166_Codes_Latitude_Longitude.csv"),
usecols=[0, 2, 3],
index_col=1,
keep_default_na=False))
#other_codes = pd.read_csv(file_path("other.csv"), index_col=1)
#
cc.columns = column_names
#cc.index.rename("Name")
return cc #pd.concat([cc, other_codes])
def other_codes():
other_codes = pd.read_csv(file_path("other.csv"), index_col=1)
other_codes.columns = column_names[0:1]
return other_codes
In [ ]:
world = gos.World(index=set(country_codes().index) | set(other_codes().index))
In [ ]:
gos.Neighborhood.update(country_codes().groupby("Alpha-3 code")["Name"].apply(list).to_dict())
gos.Neighborhood.update(other_codes().groupby('ISO')["Name"].apply(list).to_dict())
gos.Neighborhood.update(country_codes().groupby("Alpha-3 code")["Code"].apply(list).to_dict())
The Freedom Index comes from Freedom House.
A high freedom index score indicates a politically free country </font>
In [ ]:
def freedom_index():
"""
Read data from the Freedom Index.
"""
# TODO: Add xlrd to requirements.
xl = pd.ExcelFile(file_path("Freedom_index.xlsx"))
xl = xl.parse(1)
xl.set_index("Country")
return xl
fi = freedom_index().set_index("Country")
fi.columns = ["Freedom Index"]
fi.plot.hist(bins=10)
#print(len(fi))
In [ ]:
def ab_values():
"""
Read generated A/B values for each country.
"""
return pd.read_excel(file_path("A&B values for RTS.xlsx")).T
ab = ab_values()
ab.plot.hist(subplots=True, sharex=False)
The Passport Index comes from Arton Capital.
A low passport index indicates a welcoming country. </font>
In [ ]:
def passport_index():
"""
Read data from the Passport Index.
"""
pi = pd.read_excel(file_path("PassportIndex.xlsx"))
pi = pi.set_index("Country")
pi.columns = ["Passport Index"]
return pi
pi = passport_index()
pi.plot.hist()
In [ ]:
unemployment_data = pd.read_csv(file_path("CIA_Unemployment.csv"), index_col=0, usecols=[1, 2])
unemployment_data["Unemployment"] /= 100
unemployment_data.plot.hist()
In [ ]:
# Population
population = pd.read_csv(file_path("newPOP.csv"))
population = population.set_index("Country")
population
In [ ]:
world.update_neighborhoods(ab)
world.update_neighborhoods(pi)
world.update_neighborhoods(unemployment_data)
world.update_neighborhoods(population)
world.update_neighborhoods(fi)
Agents are assigned proficiency in languages spoken in their origin country. Moving to a country with entirely new languages presents a higher migration cost.
\begin{equation} L_{O\leftrightarrow D}\ = \begin{cases} 0 & \text{if origin and destination share a spoken language} \\ 1 & \text{otherwise} \\ \end{cases} \end{equation}</font>
In [ ]:
lang_csv = pd.read_csv(file_path("languages.csv"), index_col=0)
lang_sets = [set([str(y).strip() for y in x[1] if y is not ' ']) for x in lang_csv.iterrows()]
overlap = []
for s in lang_sets:
o = []
for i in range(len(lang_sets)):
o.append(len(lang_sets[i].intersection(s)) >= 1)
overlap.append(o)
lang_data = pd.DataFrame(overlap, index=lang_csv.index, columns=lang_csv.index)
print(len(lang_data))
world.add_matrix("language", 1-lang_data)
In [ ]:
un_pd = pd.read_excel(
file_path(
"UN_MigrantStockByOriginAndDestination_2015.xlsx"
),
skiprows=15
)
un_pd = un_pd.set_index('Unnamed: 1')
un_pd = un_pd.iloc[0:275,7:250]
# TODO: Should we be using the UN numbers for this?
un_pd = un_pd.sort_index().fillna(1)
world.add_matrix("un", un_pd)
In [ ]:
distance_frame = pd.read_csv(
file_path("Country_List_ISO_3166_Codes_Latitude_Longitude.csv"),
usecols=[2,4,5],
index_col=0,
keep_default_na=False)
locations = [(x[1][0], x[1][1]) for x in distance_frame.iterrows()]
rows = []
for i in range(len(locations)):
row = []
for loc in locations:
row.append(haversine(loc, locations[i]))
rows.append(row)
distance = pd.DataFrame(rows, distance_frame.index, distance_frame.index)
world.add_matrix("distance", distance / distance.max().max())
In [ ]:
pd.options.mode.chained_assignment = None # default='warn'
world.data["Freedom Index"]["AUS"]=50
#world.data["Passport Index"]["AUS"]=80
#world.data["Unemployment"]["AUS"]=.058
#world.data["Population"]["AUS"]=8000000
#world.matrices["un"]["AUS"]["USA"]=77845
In [ ]:
world.update_neighborhoods(pd.Series(world.data["A"] * e ** (world.data["B"] * skill.value)), "rts")
In [ ]:
world.update_neighborhoods(pd.Series(world.data["A"] * e ** (world.data["B"] * 30)), "beta")
In [ ]:
rows=[]
for i in range(len(world.data["Freedom Index"])):
row = []
for freedom_index in world.data["Freedom Index"]:
diff=(freedom_index-(world.data["Freedom Index"][i]))/100.0
row.append(diff)
rows.append(row)
fi_diff = (pd.DataFrame(rows, world.data["Freedom Index"].index, world.data["Freedom Index"].index))
print(fi_diff)
In [ ]:
delta1=.5
delta2=.25
political_barriers=delta1*world.data["Passport Index"]/100.0 + delta2*(1 - fi_diff)
print(political_barriers)
In [ ]:
world.add_matrix("Political Barriers", political_barriers)
Out Migration:
\begin{equation} OM_{O\rightarrow D}\ = \frac {\text {migrants from origin in destination}} {\text {population of origin}} \end{equation}
Ethnic Enclave: \begin{equation} EE_{O\leftrightarrow D}\ = \frac {\text{migrants from origin in destination}} {\text {population of destination.}} \end{equation}
Migration History: \begin{equation} MH_{O\rightarrow D}\ = \gamma_1 (1-OM_{O\rightarrow D})\ + \gamma_2 (1-EE_{O\rightarrow D}) \end{equation}
</font>
In [ ]:
gamma1=.5
gamma2=.5
OM=world.matrices['un'].sort_index(axis=1).sort_index(axis=0)/world.data['Population']
#transpose UN matrix for this calculation so that we are dividing by population of destination
EE=world.matrices['un'].T.sort_index(axis=1).sort_index(axis=0)/world.data['Population']
EE=EE.T
MH=gamma1*(OM)+gamma2*(EE)
#print(MH)
max_MH = MH.max().nlargest(10).mean()
#print(max_MH)
MH = 1 - (MH/max_MH)
MH[MH<0] = 0
print(MH)
In [ ]:
world.add_matrix("Migration History", MH)
The cost of migration between an origin and destination is the weighted average of the distance, migration history, shared language and political barriers between the two countries. \begin{equation} C = { \alpha_1 \frac{D_{O\leftrightarrow D}} {D_{Max}} + \alpha_2 MH_{O\rightarrow D} + \alpha_3 L_{O\leftrightarrow D} + \alpha_4 PB} \end{equation}
</font>
In [ ]:
# Cost
alpha1=.35
alpha2=.35
alpha3=.15
alpha4=.15
c = (alpha1*world.matrices["distance"] +
alpha2*world.matrices["Migration History"] +
alpha3*world.matrices["language"] +
alpha4*world.matrices["Political Barriers"])
world.add_matrix("cost", c * world.data['beta'])
In [ ]:
map_plot(world.matrices["cost"]["SYR"], title="Costs (SYR)")
In [ ]:
map_plot(world.matrices["cost"]["FRA"], title="Costs (FRA)")
In [ ]:
world.update_neighborhoods((1 - world.data["Unemployment"]) * world.data["rts"], "wages")
In [ ]:
map_plot(world.data["wages"], title="Wages")
In [ ]:
#beta = world.data.rts.mean()
#beta = RTS_list.median()
world.add_matrix("migration", (pd.DataFrame(
np.array(
[[x] * len(world.data) for x in world.data["wages"].values]
) - np.array(
[list(world.data["wages"].values)] * len(world.data)
),
world.data.index,
world.data.index
) - world.matrices["cost"]).clip_lower(0))
world.matrices["migration"] = world.matrices["migration"] / (world.matrices["migration"].sum() + 1)
world.matrices["migration"] = world.matrices["migration"] / world.matrices["migration"].sum(axis=1).max()
# TODO: Why does this require being transposed?
world.matrices["migration"] = (0.15 * world.matrices["migration"].transpose() * world.data["Population"]).transpose()
world.matrices["migration"]
In [ ]:
map_plot(
world.matrices["migration"].sum(axis=1)+1,
title="Immigration Estimations (x={})".format(skill.value),
normc=matplotlib.colors.LogNorm
)
In [ ]:
map_plot(
world.matrices["migration"].sum(),
title="Estimated Number of Emigrants (x={})".format(skill.value),
normc=matplotlib.colors.Normalize
)
In [ ]:
map_plot(
world.matrices["migration"].sum(axis=1) - world.matrices["migration"].sum(),
title="Net Migration (x={})".format(skill.value),
normc=gos.visualization.MidPointNorm
)
Defining Vectors
</font>
In [ ]:
immigration = world.matrices["migration"].sum(axis=1)+1
emigration = world.matrices["migration"].sum()
net_migration = world.matrices["migration"].sum(axis=1) - world.matrices["migration"].sum()
</font>
In [ ]:
%cd ~/Public/GOS/examples/multiscale-migration/data
immigration.to_excel("immigration.xlsx")
emigration.to_excel("emigration.xlsx")
net_migration.to_excel("net_migration.xlsx")
In [ ]: