In [1]:
from bs4 import BeautifulSoup
import requests
import js2py
import pandas as pd
from itertools import permutations
In [2]:
source = requests.get('https://www.passportindex.org/comparebyPassport.php').content.decode('utf-8')
In [3]:
soup = BeautifulSoup(source)
for s in soup.findAll('script'):
if 'com_c_vf' in s.text:
script = 'function a() {' \
+ s.text + \
'; return [\
Object.keys(com_c_vf).map(function(z){return [z, com_c_vf[z]]}),\
Object.keys(com_c_vf).map(function(z){return [z, com_c_voa[z]]}),\
Object.keys(com_c_vf).map(function(z){return [z, com_c_eta[z]]}),\
Object.keys(com_c_vf).map(function(z){return [z, so_vf[z]]}),\
]};a()'
data = js2py.eval_js(script)
In [4]:
so = {
'0': 'VF',
'1': 7,
'2': 14,
'3': 90,
'4': 28,
'5': 30,
'6': 180,
'7': 360,
'8': 31,
'9': '-',
'12': 60,
'13': 15,
'14': 120,
'15': 240,
'16:': 45,
'17': 21,
'18': 42,
}
In [5]:
url = 'https://gist.githubusercontent.com/ilyankou/b2580c632bdea4af2309dcaa69860013/raw/420fb417bcd17d833156efdf64ce8a1c3ceb2691/country-codes'
codes = pd.read_csv(url, dtype=str).fillna('NA').set_index('ISO2')
def fix_iso2(x):
o = {
'UK': 'GB',
'RK': 'XK'
}
return o[x] if x in o else x
In [6]:
multiindex = pd.MultiIndex.from_tuples(
list(permutations(codes.index, 2)), names=['Passport', 'Destination']
)
# By default, all countries need visas, so set all values to 0
tidy_iso2 = pd.DataFrame(index=multiindex)
tidy_iso2['Code'] = 'VR'
# i=0 for visa free (so value of 3)
# 1=1 for visa on arrival (value of 2)
# i=2 for eta (value of 1)
for i in range(3):
for j in range(len(data[i])):
passport = fix_iso2( data[i][j][0] ) # correct UK and Kosovo codes
countries = data[i][j][1].split(',')
# For visa free, we put number of dates
if i == 0:
vf2days = data[3][j][1].split(',')
for k in range(len(countries)):
country = countries[k]
if country == '':
continue
country = fix_iso2(country) # correct UK and Kosovo codes
tidy_iso2.loc[(passport, country), 'Code'] = so[vf2days[k]] if i == 0 else 'ETA' if i == 1 else 'VOA'
In [7]:
tidy_iso2.to_csv('passport-index-tidy-iso2.csv')
tidy_iso2.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
.fillna(-1).to_csv('passport-index-matrix-iso2.csv')
In [8]:
tidy_iso3 = tidy_iso2.copy(deep=True).reset_index()
tidy_iso3['Passport'] = tidy_iso3['Passport'].apply(lambda x: codes.loc[x]['ISO3'])
tidy_iso3['Destination'] = tidy_iso3['Destination'].apply(lambda x: codes.loc[x]['ISO3'])
tidy_iso3.to_csv('passport-index-tidy-iso3.csv', index=False)
tidy_iso3.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
.fillna(-1).to_csv('passport-index-matrix-iso3.csv')
In [9]:
tidy_names = tidy_iso2.copy(deep=True).reset_index()
tidy_names['Passport'] = tidy_names['Passport'].apply(lambda x: codes.loc[x]['Country'])
tidy_names['Destination'] = tidy_names['Destination'].apply(lambda x: codes.loc[x]['Country'])
tidy_names.to_csv('passport-index-tidy.csv', index=False)
tidy_names.reset_index().pivot(columns='Destination', index='Passport', values='Code')\
.fillna(-1).to_csv('passport-index-matrix.csv')
In [ ]:
In [ ]: