In [2]:
    
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
    
Load airports of each country
In [7]:
    
L=json.loads(file('../json/L.json','r').read())
M=json.loads(file('../json/M.json','r').read())
N=json.loads(file('../json/N.json','r').read())
    
In [15]:
    
import requests
    
In [15]:
    
AP={}
for c in M:
    if c not in AP:AP[c]={}
    for i in range(len(L[c])):
        AP[c][N[c][i]]=L[c][i]
    
In [18]:
    
sch={}
    
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
In [24]:
    
baseurl='https://www.airportia.com/'
import requests, urllib2
SC={}
    
parse Arrivals
In [ ]:
    
for c in AP:
    print c
    airportialinks=AP[c]
    sch={}
    for i in airportialinks:
        print i,
        if i not in sch:sch[i]={}
        #march 4-31 = 4 weeks
        for d in range (4,32):
            if d not in sch[i]:
                try:
                    
                    #capture token
                    url=baseurl+airportialinks[i]+'arrivals/201703'+str(d)
                    s = requests.Session()
                    cookiesopen = s.get(url)
                    cookies=str(s.cookies)
                    fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
                    #push token
                    opener = urllib2.build_opener()
                    for k in fcookies:
                        opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
                    #read html
                    m=s.get(url).content
                    sch[i][url]=pd.read_html(m)[0]             
                except: pass #print 'no tables',i,d
    print 
    SC[c]=sch
    
    
parse Departures
In [ ]:
    
SD={}
    
In [ ]:
    
for c in AP:
    print c
    airportialinks=AP[c]
    sch={}
    for i in airportialinks:
        print i,
        if i not in sch:sch[i]={}
        #march 4-31 = 4 weeks
        for d in range (4,32):
            if d not in sch[i]:
                try:
                    
                    #capture token
                    url=baseurl+airportialinks[i]+'departures/201703'+str(d)
                    s = requests.Session()
                    cookiesopen = s.get(url)
                    cookies=str(s.cookies)
                    fcookies=[[k[:k.find('=')],k[k.find('=')+1:k.find(' for ')]] for k in cookies[cookies.find('Cookie '):].split('Cookie ')[1:]]
                    #push token
                    opener = urllib2.build_opener()
                    for k in fcookies:
                        opener.addheaders.append(('Cookie', k[0]+'='+k[1]))
                    #read html
                    m=s.get(url).content
                    sch[i][url]=pd.read_html(m)[0]             
                except: pass #print 'no tables',i,d
    print 
    SD[c]=sch
    
Save
In [43]:
    
for c in SC:
    sch=SC[c]
    mdf=pd.DataFrame()
    for i in sch:
        for d in sch[i]:
            df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
            df['To']=i
            df['Date']=d
            mdf=pd.concat([mdf,df])
    mdf=mdf.replace('Hahn','Frankfurt')
    mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
    mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
    mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]
    file('countries/'+cnc.T.loc[c]['ISO2']+"/json/mdf_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))
    
In [ ]:
    
for c in SD:
    sch=SD[c]
    mdf=pd.DataFrame()
    for i in sch:
        for d in sch[i]:
            df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
            df['From']=i
            df['Date']=d
            mdf=pd.concat([mdf,df])
    mdf=mdf.replace('Hahn','Frankfurt')
    mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
    mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
    mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
    file('countries/'+cnc.T.loc[c]['ISO2']+"/json/mdf_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))