In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt, requests, urllib2
from bs4 import BeautifulSoup
%matplotlib inline

Import CNC


In [2]:
cnc_path='../../universal/countries/'
cnc=pd.read_excel(cnc_path+'cnc.xlsx').set_index('Name')

In [3]:
cnc


Out[3]:
Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia Aruba ... Greenland Zanzibar Svalbard and Jan Mayen Puerto Rico Isle of Man Jersey Guernsey US Virgin Islands Kosovo South Sudan
Name
ISO3 AFG ALB DZA AND AGO AIA ATG ARG ARM ABW ... GRL EAZ SJM PRI IMN JEY GGY VIR KOS SSD
ISO2 AF AL DZ AD AO AI AG AR AM AW ... GL NaN SJ PR IM JE GG VI NaN NaN
Name1 NaN Albanie Algerie Andorre NaN AnguillaUK AntiguaetBarbuda Argentine Armenie ArubaNetherlands ... Groenland NaN IlesSvalbardetJanMayen NaN IsleofMann BailiwickofJersey BailiwickofGuernsey VirginIslandsUS NaN NaN
Name2 NaN AlbaniaAlbanie AlgeriaAlgerie AndorraAndorre NaN NaN AntiguaandBarbudaAntiguaetBarbuda ArgentinaArgentine ArmeniaArmenie NaN ... GreenlandGroenland NaN NaN PuertoRicoUS NaN NaN NaN VirginIslandsUSA NaN NaN
Name3 AfghanistanIslamicRepof NaN NaN NaN NaN NaN AntiguaBarbuda NaN NaN NaN ... Gronland ZAN Svalbard NaN IMY GBJ GBG UnitedStatesVirginIslands NaN NaN
Name4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... GreenlandDenmark NaN SvalbardandJanMayenIslands NaN NaN NaN NaN NaN NaN NaN
Name5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name7 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name8 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

10 rows × 221 columns

Capture airport lists


In [33]:
L={}
M={}
N={}
F=[]
baseurl='https://www.airportia.com/'

In [34]:
for k in range(len(cnc.columns)):
    c=cnc.columns[k]
    if c not in L or F:
        #capture token
        url=baseurl+c.lower().replace(' ','-')
        s = requests.Session()
        cookiesopen = s.get(url)
        cookies=str(s.cookies)
        fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
        #push token
        opener = urllib2.build_opener()
        for k in fcookies:
            opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
        #read html
        content=s.get(url).content
        soup = BeautifulSoup(content, "lxml")
        if len(soup.findAll(attrs={'class':'textlist'}))>0:
            links=soup.findAll(attrs={'class':'textlist'})[0].findAll('a')
            L[c]=[str(i)[str(i).find('href')+6:str(i).find('title')-2] for i in links]
            M[c]=[str(i)[str(i).find('title')+7:str(i).find('>')-1] for i in links]  
            N[c]=[str(i)[str(i).find('<')-8:str(i).find('<')-5] for i in links]
            print 'Success',url
        else:
            F.append(c)
            print 'Fail',url


Success https://www.airportia.com/afghanistan
Success https://www.airportia.com/albania
Success https://www.airportia.com/algeria
Success https://www.airportia.com/andorra
Success https://www.airportia.com/angola
Success https://www.airportia.com/anguilla
Success https://www.airportia.com/antigua-and-barbuda
Success https://www.airportia.com/argentina
Success https://www.airportia.com/armenia
Success https://www.airportia.com/aruba
Success https://www.airportia.com/australia
Success https://www.airportia.com/austria
Success https://www.airportia.com/azerbaijan
Success https://www.airportia.com/bahamas
Success https://www.airportia.com/bahrain
Success https://www.airportia.com/bangladesh
Success https://www.airportia.com/barbados
Success https://www.airportia.com/belarus
Success https://www.airportia.com/belgium
Success https://www.airportia.com/belize
Success https://www.airportia.com/benin
Success https://www.airportia.com/bermuda
Success https://www.airportia.com/bhutan
Success https://www.airportia.com/bolivia
Success https://www.airportia.com/bosnia-and-herzegovina
Success https://www.airportia.com/botswana
Success https://www.airportia.com/brazil
Success https://www.airportia.com/british-virgin-islands
Fail https://www.airportia.com/brunei-darussalam
Success https://www.airportia.com/bulgaria
Success https://www.airportia.com/burkina-faso
Success https://www.airportia.com/burundi
Success https://www.airportia.com/cambodia
Success https://www.airportia.com/cameroon
Success https://www.airportia.com/canada
Success https://www.airportia.com/cape-verde
Success https://www.airportia.com/cayman-islands
Fail https://www.airportia.com/cent-african-rep
Success https://www.airportia.com/chad
Success https://www.airportia.com/chile
Success https://www.airportia.com/china
Success https://www.airportia.com/colombia
Success https://www.airportia.com/comoros
Success https://www.airportia.com/congo
Success https://www.airportia.com/cook-islands
Success https://www.airportia.com/costa-rica
Fail https://www.airportia.com/cote-d'ivoire
Success https://www.airportia.com/croatia
Success https://www.airportia.com/cuba
Success https://www.airportia.com/cyprus
Success https://www.airportia.com/czech-republic
Fail https://www.airportia.com/people's-republic-of-korea
Fail https://www.airportia.com/dem.-rep.-of-congo
Success https://www.airportia.com/denmark
Success https://www.airportia.com/djibouti
Success https://www.airportia.com/dominica
Success https://www.airportia.com/dominican-republic
Success https://www.airportia.com/ecuador
Success https://www.airportia.com/egypt
Success https://www.airportia.com/el-salvador
Success https://www.airportia.com/equatorial-guinea
Success https://www.airportia.com/eritrea
Success https://www.airportia.com/estonia
Success https://www.airportia.com/ethiopia
Success https://www.airportia.com/fiji
Success https://www.airportia.com/finland
Success https://www.airportia.com/france
Success https://www.airportia.com/gabon
Success https://www.airportia.com/gambia
Success https://www.airportia.com/georgia
Success https://www.airportia.com/germany
Success https://www.airportia.com/ghana
Success https://www.airportia.com/gibraltar
Success https://www.airportia.com/greece
Success https://www.airportia.com/grenada
Success https://www.airportia.com/guatemala
Success https://www.airportia.com/guinea
Fail https://www.airportia.com/guinea-bissau
Success https://www.airportia.com/guyana
Success https://www.airportia.com/haiti
Fail https://www.airportia.com/holy-see
Success https://www.airportia.com/honduras
Success https://www.airportia.com/hong-kong
Success https://www.airportia.com/hungary
Success https://www.airportia.com/iceland
Success https://www.airportia.com/india
Success https://www.airportia.com/indonesia
Success https://www.airportia.com/iran
Success https://www.airportia.com/iraq
Success https://www.airportia.com/ireland
Success https://www.airportia.com/israel
Success https://www.airportia.com/italy
Success https://www.airportia.com/jamaica
Success https://www.airportia.com/japan
Success https://www.airportia.com/jordan
Success https://www.airportia.com/kazakhstan
Success https://www.airportia.com/kenya
Success https://www.airportia.com/kiribati
Success https://www.airportia.com/kuwait
Success https://www.airportia.com/kyrgyzstan
Fail https://www.airportia.com/lao-people's-dem.-rep.
Success https://www.airportia.com/latvia
Success https://www.airportia.com/lebanon
Success https://www.airportia.com/lesotho
Success https://www.airportia.com/liberia
Fail https://www.airportia.com/libyan-arab-jamahiriya
Fail https://www.airportia.com/liechtenstein
Success https://www.airportia.com/lithuania
Success https://www.airportia.com/luxembourg
Fail https://www.airportia.com/macao,-china
Success https://www.airportia.com/madagascar
Success https://www.airportia.com/malawi
Success https://www.airportia.com/malaysia
Success https://www.airportia.com/maldives
Success https://www.airportia.com/mali
Success https://www.airportia.com/malta
Success https://www.airportia.com/marshall-islands
Success https://www.airportia.com/mauritania
Success https://www.airportia.com/mauritius
Success https://www.airportia.com/mexico
Fail https://www.airportia.com/micronesia-(federated-states-of)
Success https://www.airportia.com/monaco
Success https://www.airportia.com/mongolia
Success https://www.airportia.com/montenegro
Success https://www.airportia.com/montserrat
Success https://www.airportia.com/morocco
Success https://www.airportia.com/mozambique
Fail https://www.airportia.com/myanmar-(burma)
Success https://www.airportia.com/namibia
Success https://www.airportia.com/nauru
Success https://www.airportia.com/nepal
Success https://www.airportia.com/netherlands
Fail https://www.airportia.com/netherlands-antilles
Success https://www.airportia.com/new-zealand
Success https://www.airportia.com/nicaragua
Success https://www.airportia.com/niger
Success https://www.airportia.com/nigeria
Success https://www.airportia.com/niue
Success https://www.airportia.com/norway
Success https://www.airportia.com/oman
Success https://www.airportia.com/pakistan
Success https://www.airportia.com/palau
Fail https://www.airportia.com/palestinian-territories
Success https://www.airportia.com/panama
Success https://www.airportia.com/papua-new-guinea
Success https://www.airportia.com/paraguay
Fail https://www.airportia.com/peru
Success https://www.airportia.com/philippines
Success https://www.airportia.com/poland
Success https://www.airportia.com/portugal
Success https://www.airportia.com/qatar
Fail https://www.airportia.com/rep.-of-korea
Fail https://www.airportia.com/rep.-of-moldova
Success https://www.airportia.com/romania
Fail https://www.airportia.com/russian-federation
Success https://www.airportia.com/rwanda
Success https://www.airportia.com/saint-kitts-and-nevis
Success https://www.airportia.com/saint-lucia
Success https://www.airportia.com/saint-vincent-and-the-grenadines
Success https://www.airportia.com/samoa
Fail https://www.airportia.com/san-marino
Success https://www.airportia.com/são-tomé-and-principe
Success https://www.airportia.com/saudi-arabia
Success https://www.airportia.com/senegal
Success https://www.airportia.com/serbia
Success https://www.airportia.com/seychelles
Success https://www.airportia.com/sierra-leone
Success https://www.airportia.com/singapore
Success https://www.airportia.com/slovakia
Success https://www.airportia.com/slovenia
Success https://www.airportia.com/solomon-islands
Success https://www.airportia.com/somalia
Success https://www.airportia.com/south-africa
Success https://www.airportia.com/spain
Success https://www.airportia.com/sri-lanka
Success https://www.airportia.com/sudan
Success https://www.airportia.com/suriname
Success https://www.airportia.com/swaziland
Success https://www.airportia.com/sweden
Success https://www.airportia.com/switzerland
Fail https://www.airportia.com/syrian-arab-republic
Success https://www.airportia.com/tajikistan
Success https://www.airportia.com/thailand
Fail https://www.airportia.com/fyr-of-macedonia
Fail https://www.airportia.com/timor-leste
Success https://www.airportia.com/togo
Fail https://www.airportia.com/tokelau
Success https://www.airportia.com/tonga
Success https://www.airportia.com/trinidad-and-tobago
Success https://www.airportia.com/tunisia
Success https://www.airportia.com/turkey
Success https://www.airportia.com/turkmenistan
Success https://www.airportia.com/turks-and-caicos-islands
Success https://www.airportia.com/tuvalu
Success https://www.airportia.com/uganda
Success https://www.airportia.com/ukraine
Success https://www.airportia.com/united-arab-emirates
Success https://www.airportia.com/united-kingdom
Success https://www.airportia.com/tanzania
Success https://www.airportia.com/united-states
Success https://www.airportia.com/uruguay
Success https://www.airportia.com/uzbekistan
Success https://www.airportia.com/vanuatu
Success https://www.airportia.com/venezuela
Success https://www.airportia.com/vietnam
Success https://www.airportia.com/yemen
Success https://www.airportia.com/zambia
Success https://www.airportia.com/zimbabwe
Success https://www.airportia.com/french-guiana
Success https://www.airportia.com/western-sahara
Success https://www.airportia.com/taiwan
Success https://www.airportia.com/greenland
Fail https://www.airportia.com/zanzibar
Fail https://www.airportia.com/svalbard-and-jan-mayen
Success https://www.airportia.com/puerto-rico
Success https://www.airportia.com/isle-of-man
Success https://www.airportia.com/jersey
Success https://www.airportia.com/guernsey
Fail https://www.airportia.com/us-virgin-islands
Success https://www.airportia.com/kosovo
Success https://www.airportia.com/south-sudan

Fix failures


In [35]:
lnc={
"Brunei Darussalam":"brunei",
"Cent African Rep":"central-african-republic",
"Cote d'Ivoire":"ivory-coast",
"People's Republic of Korea":"north-korea",
"Dem. Rep. of Congo":"congo",
"Guinea-Bissau":"guinea_bissau",
"Holy See":"Holy See",
"Lao People's Dem. Rep.":"laos",
"Libyan Arab Jamahiriya":"libya",
"Liechtenstein":"Liechtenstein",
"Macao, China":"Macao, China",
"Micronesia (Federated States of)":"micronesia",
"Myanmar (Burma)":"burma",
"Netherlands Antilles":"Netherlands Antilles",
"Palestinian Territories":"palestinian-territory",
"Peru":u"perú",
"Rep. of Korea":"south-korea",
"Rep. of Moldova":"moldova",
"Russian Federation":"russia",
"San Marino":"San Marino",
"Syrian Arab Republic":"syria",
"FYR of Macedonia":"macedonia",
"Timor-Leste":"timor_leste",
"Tokelau":"Tokelau",
"Zanzibar":"Zanzibar",
"Svalbard and Jan Mayen":"Svalbard and Jan Mayen",
"US Virgin Islands":"US Virgin Islands"
}

In [39]:
F2=[]

In [44]:
for k in range(len(cnc.columns)):
    c=cnc.columns[k]
    if c not in L and F2:
        #capture token
        url=baseurl+lnc[c]
        s = requests.Session()
        cookiesopen = s.get(url)
        cookies=str(s.cookies)
        fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
        #push token
        opener = urllib2.build_opener()
        for k in fcookies:
            opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
        #read html
        content=s.get(url).content
        soup = BeautifulSoup(content, "lxml")
        if len(soup.findAll(attrs={'class':'textlist'}))>0:
            links=soup.findAll(attrs={'class':'textlist'})[0].findAll('a')
            L[c]=[str(i)[str(i).find('href')+6:str(i).find('title')-2] for i in links]
            M[c]=[str(i)[str(i).find('title')+7:str(i).find('>')-1] for i in links]       
            N[c]=[str(i)[str(i).find('<')-8:str(i).find('<')-5] for i in links]
            print 'Success',url
        else:
            F2.append(c)
            print 'Fail',url


Fail https://www.airportia.com/Holy See
Success https://www.airportia.com/laos
Success https://www.airportia.com/libya
Fail https://www.airportia.com/Liechtenstein
Fail https://www.airportia.com/Macao, China
Success https://www.airportia.com/micronesia
Success https://www.airportia.com/burma
Fail https://www.airportia.com/Netherlands Antilles
Success https://www.airportia.com/palestinian-territory
Success https://www.airportia.com/perú
Success https://www.airportia.com/south-korea
Success https://www.airportia.com/moldova
Success https://www.airportia.com/russia
Fail https://www.airportia.com/San Marino
Success https://www.airportia.com/syria
Success https://www.airportia.com/macedonia
Success https://www.airportia.com/timor_leste
Fail https://www.airportia.com/Tokelau
Fail https://www.airportia.com/Zanzibar
Fail https://www.airportia.com/Svalbard and Jan Mayen
Fail https://www.airportia.com/US Virgin Islands

Update CNC


In [113]:
g=[]
for k in range(len(cnc.columns)):
    c=cnc.columns[k]
    if c in F2:
        g.append(np.nan)
    elif c in F:
        g.append(lnc[c])
    else:
        g.append(c.lower().replace(' ','-'))

In [114]:
dnc=cnc.T
dnc['Airportia']=g
cnc=dnc.T

In [116]:
cnc


Out[116]:
Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia Aruba ... Greenland Zanzibar Svalbard and Jan Mayen Puerto Rico Isle of Man Jersey Guernsey US Virgin Islands Kosovo South Sudan
Name
ISO3 AFG ALB DZA AND AGO AIA ATG ARG ARM ABW ... GRL EAZ SJM PRI IMN JEY GGY VIR KOS SSD
ISO2 AF AL DZ AD AO AI AG AR AM AW ... GL NaN SJ PR IM JE GG VI NaN NaN
Name1 NaN Albanie Algerie Andorre NaN AnguillaUK AntiguaetBarbuda Argentine Armenie ArubaNetherlands ... Groenland NaN IlesSvalbardetJanMayen NaN IsleofMann BailiwickofJersey BailiwickofGuernsey VirginIslandsUS NaN NaN
Name2 NaN AlbaniaAlbanie AlgeriaAlgerie AndorraAndorre NaN NaN AntiguaandBarbudaAntiguaetBarbuda ArgentinaArgentine ArmeniaArmenie NaN ... GreenlandGroenland NaN NaN PuertoRicoUS NaN NaN NaN VirginIslandsUSA NaN NaN
Name3 AfghanistanIslamicRepof NaN NaN NaN NaN NaN AntiguaBarbuda NaN NaN NaN ... Gronland ZAN Svalbard NaN IMY GBJ GBG UnitedStatesVirginIslands NaN NaN
Name4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... GreenlandDenmark NaN SvalbardandJanMayenIslands NaN NaN NaN NaN NaN NaN NaN
Name5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name6 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name7 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Name8 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Airportia afghanistan albania algeria andorra angola anguilla antigua-and-barbuda argentina armenia aruba ... greenland NaN NaN puerto-rico isle-of-man jersey guernsey NaN kosovo south-sudan

11 rows × 221 columns


In [118]:
cnc.to_excel(cnc_path+'cnc1.xlsx')

Save links


In [45]:
import json
file('../json/L.json','w').write(json.dumps(L))
file('../json/M.json','w').write(json.dumps(M))
file('../json/N.json','w').write(json.dumps(N))

Create folder structure


In [125]:
import os.path

In [131]:
for i in cnc.loc['ISO2']:
    if str(i).lower()!='nan':
        directory='../countries/'+i.lower()
        if not os.path.exists(directory) :
            os.makedirs(directory)
        for j in ['code','d3','json','map']:
            if not os.path.exists(directory+'/'+j):
                os.makedirs(directory+'/'+j)