In [44]:
    
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
    
In [45]:
    
url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'
df=pd.read_html(url)
df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')
    
In [3]:
    
df
    
    Out[3]:
In [4]:
    
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
    
In [5]:
    
locations={}
for i in df.index:
    results = Geocoder(apik).geocode(i+' airport Hungary')
    locations[i]=results[0].coordinates
    print i
    
    
In [6]:
    
file("locations_hu.json",'w').write(json.dumps(locations))
    
In [7]:
    
locations=json.loads(file('locations_hu.json','r').read())
    
In [8]:
    
import requests
    
In [9]:
    
airportialinks={}
for i in locations:
    print i,
    if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'
    else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z
    
    
In [10]:
    
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'
    print airportialinks[z]
    
    
In [65]:
    
sch={}
    
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
In [66]:
    
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'arrivals/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d
    
    
In [67]:
    
for i in range(11,25):
    testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i)
    print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318'
k=sch['BUD'][testurl]
k[k['From']=='Frankfurt FRA']
    
    
    Out[67]:
sch checks out with source
In [68]:
    
mdf=pd.DataFrame()
    
In [69]:
    
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['To']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])
    
In [70]:
    
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
    
In [71]:
    
mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]
    
In [72]:
    
k=mdf[mdf['Date']==testurl]
k[k['From']=='Frankfurt FRA']
    
    Out[72]:
mdf checks out with source
In [73]:
    
file("mdf_hu_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))
    
In [74]:
    
len(mdf)
    
    Out[74]:
In [55]:
    
airlines=set(mdf['Airline'])
    
In [56]:
    
cities=set(mdf['City'])
    
In [57]:
    
file("cities_hu_arrv.json",'w').write(json.dumps(list(cities)))
file("airlines_hu_arrv.json",'w').write(json.dumps(list(airlines)))
    
In [60]:
    
citycoords={}
    
In [61]:
    
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i
    
    
In [62]:
    
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}
    
In [63]:
    
file("citysave_hu_arrv.json",'w').write(json.dumps(citysave))