In [44]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [45]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'
df=pd.read_html(url)
df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')

In [3]:
df


Out[3]:
Location served County ICAO Airport name Elev. Runways
IATA
BUD Budapest (Capital) LHBP Budapest Ferenc Liszt International Airport 151 m (495 ft) 3010 m x 59 m 3707 x 59 m
DEB Debrecen Hajdú-Bihar LHDC Debrecen International Airport 109 m (359 ft) 2498 m x 40 m
SOB Sármellék Zala LHSM Hévíz-Balaton Airport 124 m (408 ft) 2500 x 60 m
QGY Győr-Pér Győr-Moson-Sopron LHPR Győr-Pér International Airport 129 m (424 ft) 2030 x 30 m 1134 x 43 m
QPJ Pécs-Pogány Baranya LHPP Pécs-Pogány International Airport 305 m (1000 ft) 1500 x 30 m

In [4]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'

In [5]:
locations={}
for i in df.index:
    results = Geocoder(apik).geocode(i+' airport Hungary')
    locations[i]=results[0].coordinates
    print i


BUD
DEB
SOB
QGY
QPJ

In [6]:
file("locations_hu.json",'w').write(json.dumps(locations))

In [7]:
locations=json.loads(file('locations_hu.json','r').read())

In [8]:
import requests

In [9]:
airportialinks={}
for i in locations:
    print i,
    if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'
    else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z


QPJ https://www.airportia.com/hungary/pécs_pogány-airport/map/
DEB https://www.airportia.com/hungary/debrecen-international-airport
SOB https://www.airportia.com/hungary/sármellék...airport/arrivals
BUD https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport
QGY https://www.airportia.com/hungary/győr_pér...airport/photos

In [10]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'
    print airportialinks[z]


https://www.airportia.com/hungary/pécs_pogány-airport/map/
https://www.airportia.com/hungary/debrecen-international-airport/
https://www.airportia.com/hungary/sármellék-international-airport/
https://www.airportia.com/hungary/győr_pér-international-airport/
https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/

In [65]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [66]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'arrivals/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d


QPJ
DEB
SOB
BUD
QGY

In [67]:
for i in range(11,25):
    testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i)
    print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318'
k=sch['BUD'][testurl]
k[k['From']=='Frankfurt FRA']


nr. of flights on March 11 : 87
nr. of flights on March 12 : 117
nr. of flights on March 13 : 122
nr. of flights on March 14 : 102
nr. of flights on March 15 : 108
nr. of flights on March 16 : 109
nr. of flights on March 17 : 126
nr. of flights on March 18 : 86
nr. of flights on March 19 : 120
nr. of flights on March 20 : 126
nr. of flights on March 21 : 105
nr. of flights on March 22 : 110
nr. of flights on March 23 : 110
nr. of flights on March 24 : 124
Out[67]:
Flight From Airline Scheduled Arrival Status Unnamed: 6
14 LH1334 Frankfurt FRA Lufthansa 10:05 09:57 Landed Track >
41 LH1338 Frankfurt FRA Lufthansa 13:45 13:33 Landed Track >
61 LH1340 Frankfurt FRA Lufthansa 18:00 17:59 Landed Track >
79 LH1342 Frankfurt FRA Lufthansa 23:05 22:58 Landed Track >

sch checks out with source


In [68]:
mdf=pd.DataFrame()

In [69]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['To']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])

In [70]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')

In [71]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]

In [72]:
k=mdf[mdf['Date']==testurl]
k[k['From']=='Frankfurt FRA']


Out[72]:
From Airline To Date City Airport
14 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
41 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
61 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
79 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA

mdf checks out with source


In [73]:
file("mdf_hu_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [74]:
len(mdf)


Out[74]:
1584

In [55]:
airlines=set(mdf['Airline'])

In [56]:
cities=set(mdf['City'])

In [57]:
file("cities_hu_arrv.json",'w').write(json.dumps(list(cities)))
file("airlines_hu_arrv.json",'w').write(json.dumps(list(airlines)))

In [60]:
citycoords={}

In [61]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i


Manchester
Lyon
Sofia
Oslo
Kiev
Istanbul
Paris
Bologna
Riga
Cairo
Algiers
St. Petersburg
Gothenburg
Nurnberg
Barcelona
Cologne
Lanzarote
Rotterdam
Vienna
Glasgow
Nice
Cluj-Napoca
Edinburgh
Liverpool
Alicante
Larnaca
Dortmund
Moscow
Madrid
Thessaloniki
Munich
Malmo
Kutaisi
Berlin
Geneva
Leeds
Fuerteventura
Catania
Treviso
Brussels
Hong Kong
Eilat
Porto
Dubai
Eindhoven
Malaga
Helsinki
Naples
Basel
East Midlands
Hamburg
Dublin
Dusseldorf
Tenerife
Athens
Stuttgart
Zurich
Minsk
Pisa
Stockholm
Bristol
Tel Aviv
Venice
Frankfurt
Las Palmas
Bucharest
Reykjavik
Belgrade
Doha
Billund
Karlsruhe/Baden-Baden
Prague
Baku
Birmingham
Luqa
Milan
Rome
London
Lisbon
Bari
Amsterdam
Copenhagen
Hurghada
Warsaw

In [62]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}

In [63]:
file("citysave_hu_arrv.json",'w').write(json.dumps(citysave))