In [2]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Load airports of each country


In [7]:
L=json.loads(file('../json/L.json','r').read())
M=json.loads(file('../json/M.json','r').read())
N=json.loads(file('../json/N.json','r').read())

In [15]:
import requests

In [15]:
AP={}
for c in M:
    if c not in AP:AP[c]={}
    for i in range(len(L[c])):
        AP[c][N[c][i]]=L[c][i]

In [18]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [24]:
baseurl='https://www.airportia.com/'
import requests, urllib2
SC={}

parse Arrivals


In [ ]:
for c in AP:
    print c
    airportialinks=AP[c]
    sch={}
    for i in airportialinks:
        print i,
        if i not in sch:sch[i]={}
        #march 4-31 = 4 weeks
        for d in range (4,32):
            if d not in sch[i]:
                try:
                    
                    #capture token
                    url=baseurl+airportialinks[i]+'arrivals/201703'+str(d)
                    s = requests.Session()
                    cookiesopen = s.get(url)
                    cookies=str(s.cookies)
                    fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
                    #push token
                    opener = urllib2.build_opener()
                    for k in fcookies:
                        opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
                    #read html
                    m=s.get(url).content
                    sch[i][url]=pd.read_html(m)[0]             
                except: pass #print 'no tables',i,d
    print 
    SC[c]=sch


Canada
YUY YUX XBW ZQS XBR YUL XBE YUB YUD XBB YRT ZTS XPK YUT HZP XPP LAK ZFN YBX YBY YBQ YBR YBS YBT CJH YBV YBW YBH YBI YBJ YBK YBL YBM YBN YBO YBA YBB YBC YBD YLL YBF YBG IUM ZGR ZGS XKO XKS YYN ZGI AKV XFZ YCE SYF YIG YIF YIB YIO YIK YIV XTL ZMT YFE YFG YFA YFB YFC YFL YFO YFH YFJ YFR YFS YFX YPR YPS YPP YPQ ZRR YPT YPZ YPX YPY YPB YPC YPA YPF YPG YPD YPE YPJ YPH YPI YPN YPO YPL YPM CFQ ZNA ZNG ZJN ZNU XQU YHB YMQ YMP YMR YMT YMW YMX YMA YMC QBC YME YMD YMG YMF YMI YMH YMJ YMM YML YMO YMN ZAC SUR YSK YSJ YSI YSH YSO YSN YSM YSL YSC XLF YSA YSG XLB YSE YSD YSZ YSY YSX YSS YSR YSQ YSP YSV YSU YPW YZP YZR YZS YZT YZU YZV YZW YZX YZZ YZA YZC YZD YZE YZF YZG YZH YQR YZM YOD YTZ YTX YTT YTU YTR YTS YTP YTQ YTN YTO YTL YTM YTJ YTK YTH YTI YTF YTG YTD YTE YTB YTC YTA SSQ XCL XCM YHK YHH YHI YHN YHO YHM ZJG YHC YHA YHF YHG YHD YHE YHZ YHY YHR YHS YHP YHT YHU DGF ZRJ WPL WPC YAY YAX YAZ YAU YAT YAW YAV YAQ YAR YAM YAL YAH YAJ YAE DUQ YAG YAF YAA YAC YAB TUX ZEL ZEM KEW KES ZST ZSW ZSP YWR YWQ YWP YWY KIF YWG YWF YWB YWA YWO YWN YWM YWL YWK YWJ ZSJ YWH YWS YBE XGL XGR YLW YLT YLR YLS YLP YLQ ZFW YLX YLY YLF YLD YLE YLB YLC ZFM YLA ZFB ZFA YLM YLJ YLH ZFD KNY KNV YEK YEM YEL YEN YED YEG ZAA YEY YEP YER YEU YET YEV YAD YKE WNN YKC YKA YKG YKF ZOF YKD YKK YKJ YKI YKL YKQ YLO YKU YKT YKZ YKY YKX XRR ZFL ILF DVK ZHP HNX ZBF ZBD ZBM ZTM YRI YRJ YRL YRM YRN YRO YRA YRB YRD YRE YRF YRG YRQ YRR YRS XMP YRV YYU YYT YYW ZMH YYQ YYR YYY YYZ YYE YYD YYG YYF YYA YYC YYB YYM YYL PIW YYI YYH YYJ JHL MSA YOO YOJ YOH YOG ZKE YOE ZKG YOC YOA ZWL YOY YOW YOS YOP YMB ZPO TIL GSL YDN YDO YDL YVO YDJ YDK YDH YDI YDF YDG YDE YDB YDC YDA YDX YDV YDW YDT YDR YDS YDP YDQ YVT YVV YVP YVQ YVR YVZ YVE YVG YVB YVC YVM ZPB CXH YJF YJN YJO YJP ZLT YJT XSI NWP YCZ YCY YCX YCS YCR YCQ YCP YCW YCV YCT YCK YCJ YCI YCH YCO YCN YCM YCL YCC YCB YCA YCG YCF ZGF YCD LRQ YQM YQL YQN YQI YQH YQK YQE YQD YQG YQF YQA YQC YQB ZUC YQY YQX YQZ YQU YQT YQW YQV YQQ YQS ZUM YXZ YXX YXY YXR YXS YXP YXQ YXT YXU YXJ YXK YXH YXI YXN YXL YSB YXC YXF YXD YXE YSF YNL YNM YNN YNO YNI YNK YND YNE YNF YNA YNC YNP YNR YNS DAS YST TNS ZTB YGG YGE YGC YGB YGA YGO YGN YGM YGL YGK YGH YGW YGV YGT YGS YGR YGQ YGP YGZ YGY YGX
Libyan Arab Jamahiriya
GHT SEB DNF BEN QMQ LMQ WAX TIP MJI BCQ QUB AKF HUQ LTD TOB SRX LAQ MRA NFR
Turkmenistan
CRZ ASB KRW TAZ MYP
Lithuania
VNO KUN SQQ PLQ PNV
FYR of Macedonia
OHD SKP
Cambodia
PNH BBM MWV KKZ REP HPP TNX KZC KMT KTI KZD PAI KZK OMY SVR RBE KOS
Dem. Rep. of Congo
KND BZV BZU LUS KOO LIE KMN KMK LKC KBN EPN IDF MSM FDU LIQ BAN BOA BOE DIS BMB LZI BUX PWO SOE KBO IRP FTX NKY BSU MSX OUE ION INO BDV MNO MNB KWZ BKY LBO IKL KRZ FKI KNM EWO GMM MDK LJA MJM GMA KIL KLI MKJ BDT KAP YAN FMI BNC BNB FIH DJM KLY MAT KNJ PUN TSH GOM NIO SIB KEE PNR KEC NKL ANJ MUY KGA MEW PFR OKG BTB LZA LCO GDJ KKW FBM KGN
Ethiopia
BCO NEJ NEK AXU EGL GLC GHD XBL MKD MKS BCY ETE TIE WRA GNN DIR FNH AMH MYS LFO SKR SZE LLI SXU HIL AWH MQX MUJ ASO AWA GDE ABK NDM DGC GMB JIM JIJ GOB ADD SHC WAC DBM DSE MTF GOR GDQ BEI ALK HUE MZX TUJ BJR OTA PWI DBT DEM
Aruba
AUA
Sri Lanka
ACJ CMB DIW MNH KDY KDZ NUF KDW HIM GIU KTY ADP AFK GOY JAF TRR WRZ KCT BJT KEZ BTC HRI DBU
Swaziland
MTS
Guinea-Bissau
BQE OXB
Argentina
RYO GHU CCT CPC CPG OYO OYA EPA PRA MQD UAQ AOL PRQ NQN APZ GPO EQS LHS MJR LPG EHL OVR SZQ SFN PUD JNI RHD CRR AEP VDR RGA CRD RGL EMX VCF LLS VLG ROY ROS TTG IGR AFA FTE IGB CSZ VGS LCP LMD PSS ING PSV LUQ COR RES NEC COC PEH REL CTC PMQ GGS JSM PMY OLN MDZ MDX MDQ CUT ELO RDS CLX FMA RLO TDL OES VME VMA SST TUC BRC ORA GNR RCU RCQ USH CVH CVI SDE VDM SGV UZU CNT RZA CNQ IRJ HOS EZE ULA SLA BHI ARR LCM RSA JUJ NCJ LGS MCS
Bolivia
REY CIJ SRZ SNM CAM VLM GYA MQK BYC ASC VVI CEP RIB SRB TJA VAH SRD SRE SRJ SJS POI SJV RBQ RBO APB SJB MHW LPB CBB SNG MGD BJO ORU TDD PSZ SBL BVK PUR BVL
Cameroon
NGE BPC KOB DLA NSI KBI TKC NKS OUR KLE MVR VCC YAO GOU FOM BFX DSC BLC EBW BTA MMF GXX
Burkina Faso
BOY XNU TMQ FNG XBO XSE DOR XBG DIP XLU XDE OUA OUG XDJ TEG XZA DGU XPA BNR XAR XKA XKY XGG XGA TUQ PUP ARL
Ghana
ACC TML TKD NYI KMS
Saudi Arabia
KMC DWD DMM YNB AKH HBT TIF ZUL KMX RAE EAM ELQ SHW RAH XZF AQI HOF EJH MED WAE MJH GIZ DHA EWD ABT JED UZH SLF HAS URY AJF AHB BHH RUH TUU TUI
Cape Verde
BVR SNE VXE SFL RAI BVC MMO MTI NTO SID
Slovenia
MBX LJU POW
Guatemala
CMM DON RER GUA CIQ LOX PCG AAZ UAX HUG TKM PON AQB PKJ PBR ENJ FRS CTF LCF CBV RUV MCR
Bosnia and Herzegovina
SJJ BNX OMO TZL
Kuwait
KWI XIJ
Russian Federation
DKS CKL URS RYB CKH NAL EGO NYM NYA NNM OHO OHH IWA MQF UIK RVH KHV VVO KYZ RAT THX JOK BWO SVO OVS MRV IAA URJ LPK BQS SVX ZIA MJZ IAR KMW TYD KVK OVB PYJ REN AMV KVX GRV AER DME SKX KSZ KUF RGK CEE LDG NER CEK OSW PKC RTW SCW VLU HMA ROV OSF NFG IKS IGT CSH KPW VGD VKT OEL YKS CSY LNX HTG KXK NVR HTA UUA MOW KCK NOZ INA BTK NOJ PEX PEZ NEF EIE PES ACS BCX VUS PEE BZK NJC PVS BKA NUX IKT PVX ASF NBC OKT KJA KRR VOZ VOG KRO ADH NOI LED KEJ GDX UFA GDZ UUS OMS MMK GDG UUD KZN TOX UKX TJM USK BAX AAQ TBW TOF SWT TYA NSK CYX DYR UCT SGC EYK KLD ABA KLF ULY SLY GOJ RZN ULV VLK STW VKO CNN PKV ESL KGP OGZ PWE KGD MCX ARH IJK
Jordan
AMM MPQ ADJ OMF AQJ
Dominica
DCF DOM
Liberia
RVC VOI TPT CPA ROB NIA FOY SNI WES SAZ BYL XSA WOI GRC UCN THC MLW
Maldives
IFU

parse Departures


In [ ]:
SD={}

In [ ]:
for c in AP:
    print c
    airportialinks=AP[c]
    sch={}
    for i in airportialinks:
        print i,
        if i not in sch:sch[i]={}
        #march 4-31 = 4 weeks
        for d in range (4,32):
            if d not in sch[i]:
                try:
                    
                    #capture token
                    url=baseurl+airportialinks[i]+'departures/201703'+str(d)
                    s = requests.Session()
                    cookiesopen = s.get(url)
                    cookies=str(s.cookies)
                    fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
                    #push token
                    opener = urllib2.build_opener()
                    for k in fcookies:
                        opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
                    #read html
                    m=s.get(url).content
                    sch[i][url]=pd.read_html(m)[0]             
                except: pass #print 'no tables',i,d
    print 
    SD[c]=sch

Save


In [43]:
for c in SC:
    sch=SC[c]
    mdf=pd.DataFrame()
    for i in sch:
        for d in sch[i]:
            df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
            df['To']=i
            df['Date']=d
            mdf=pd.concat([mdf,df])
    mdf=mdf.replace('Hahn','Frankfurt')
    mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
    mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
    mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]
    file('countries/'+cnc.T.loc[c]['ISO2']+"/json/mdf_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [ ]:
for c in SD:
    sch=SD[c]
    mdf=pd.DataFrame()
    for i in sch:
        for d in sch[i]:
            df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
            df['From']=i
            df['Date']=d
            mdf=pd.concat([mdf,df])
    mdf=mdf.replace('Hahn','Frankfurt')
    mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
    mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
    mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
    file('countries/'+cnc.T.loc[c]['ISO2']+"/json/mdf_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))