In [1]:
import metatab
import pandas as pd
import matplotlib.pyplot
%matplotlib notebook
matplotlib.style.use('ggplot')
doc = metatab.open_package('..') # Assumes you started Jupyter in the same dir as the metadata file
# To run this on the web, without downloading the data package
#package_url='https://raw.githubusercontent.com/CivicKnowledge/metatab-packages/master/civicknowledge.com/immigration-vs-gdp/metadata.csv'
#doc = metatab.open_package(package_url)
list(r.name for r in doc.resources())
pop_df = doc.first_resource('total_pop').dataframe()
gdp_df = doc.first_resource('country_gdp').dataframe()
md_df = doc.first_resource('migration').dataframe()
In [2]:
# Add a year-over-year GDP change to the GDP file.
gdpm=gdp_df[ (gdp_df.subject_code == 'T_GDPPOP') & ( gdp_df.measure_code=='CPC')][[ 'location','time','gdp']]
gdpm['time'] = pd.to_datetime(gdpm.time, format='%Y') # Turn year integers into datetime64 values
gdpm.set_index(['location','time'], inplace=True)
gdpm['change'] = gdpm.pct_change(periods=1)
gdpm.reset_index( inplace=True)
gdpm.to_csv('../data/gdp-pop-migration.csv') # Save file for debugging.
In [3]:
# CPC == USD, current prices, current PPPs
# T_GDP == Gross Domestic Product (GDP); millions
# B11 == Inflows of foreign population by nationality
# TOT == total in migration into a country
# GDP For 2014
gdp2014=gdpm[gdpm.time.dt.year == 2014]
# 2014 total inmigration per country
md2014 = md_df[ (md_df.year==2014) & (md_df['var'] == 'B11') & (md_df.country_code == 'TOT')]
# Middle variant estimated population per country for 2014.
pop2014 = pop_df[(pop_df.variant == 'Medium') & ( pop_df.time == 2014)]
In [4]:
#Create an index of all of the country names and codes, to use as a "backbone" for merging.
l1 = md2014[['country','cou']].drop_duplicates()
l2 = pop2014[['location','locid']].drop_duplicates()
index = l2.merge(l1, how='outer', left_on='location', right_on='country')
index.loc[index.location=='United States of America',['country','cou']] = ['United States', 'USA']
index.to_csv('/tmp/countries.csv')
In [12]:
gpop = index\
.merge(gdp2014, left_on='cou', right_on='location', suffixes=('','_gdp'))\
.merge(pop2014, on='locid', suffixes=('','_pop'))\
.merge(md2014, on='cou', suffixes=('','_md')) \
.set_index('cou')\
['location gdp change poptotal growthrate popdensity in_migration_count'.split()].copy()
In [13]:
# Adjust for population
gpop['migrant_per_pop'] = gpop.in_migration_count / gpop.poptotal
# Average GSDP change rate
gdp_a = gdpm.groupby('location').mean()[['change']]
# Average Annual migration
md_a = md_df[(md_df['var'] == 'B11') & (md_df.country_code == 'TOT')].groupby('cou').mean()[['in_migration_count']]
# Change some column names
md_a.columns = ['in_migration_avg']
gdp_a.columns = ['gdp_change_avg']
# Merge the averages back in
gpop_a = gpop.merge(gdp_a, left_index=True, right_index=True).merge(md_a, left_index=True, right_index=True)
In [14]:
gpop.plot.scatter(x='migrant_per_pop',y='gdp')
Out[14]:
In [8]:
gpop.corr()
Out[8]:
In [9]:
#gdp2014=gdpm[gdpm.time.dt.year == 2014]
#md2014 = md_df[ (md_df.year==2014) & (md_df['var'] == 'B11') & (md_df.country_code == 'TOT')] # 'Inflows of foreign population by nationality'
#pop2014 = pop_df[(pop_df.variant == 'Medium') & ( pop_df.time == 2014)]
In [ ]:
In [10]:
gpop_a.corr()
Out[10]:
In [11]:
gpop_a.plot.scatter(x='migrant_per_pop',y='gdp_change_avg')
Out[11]:
In [ ]:
In [ ]: