In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_Romania'
df=pd.read_html(url)
df=df[0].loc[:17].T.set_index(0).T.loc[2:].set_index('IATA')

In [3]:
df


Out[3]:
City served ICAO Airport name Website Frequency Status nan
IATA
ARW Arad LRAR Arad International Airport http://www.aeroportularad.ro TWR 130.2 MHz NaN NaN
BCM Bacău LRBC Bacău "George Enescu" International Airport [14] http://www.bacauairport.ro TWR 118.6 MHz NaN NaN
BAY Baia Mare / Tăuții-Măgherăuș LRBM Baia Mare Airport (Tăuții-Măgherăuș Airport) http://www.baiamareairport.ro TWR 123.6 MHz Closed for renovation NaN
BBU Bucharest / Băneasa LRBS Bucharest "Aurel Vlaicu" International Airport... http://www.baneasa.aero APP 127.6 MHz TWR 120.8 MHz Only private flights NaN
OTP Bucharest / Otopeni LROP Bucharest "Henri Coandǎ" International Airport... http://www.otp-airport.ro APP 126.2 TWR1 120.9 TWR2 121.85 NaN NaN
CLJ Cluj-Napoca LRCL Cluj "Avram Iancu" International Airport http://www.airportcluj.ro APP 125.1 MHz TWR 134.4 MHz NaN NaN
CND Constanța LRCK Constanța "Mihail Kogălniceanu" International ... http://www.mk-airport.ro TWR 120.24 MHz NaN NaN
CRA Craiova LRCV Craiova Airport http://www.aeroportcraiova.ro TWR 124.3 MHz NaN NaN
IAS Iași LRIA Iași International Airport http://www.aeroport.ro TWR 119.2 MHz NaN NaN
OMR Oradea LROD Oradea International Airport http://www.aeroportoradea.ro TWR 120.2 MHz NaN NaN
SUJ Satu Mare LRSM Satu Mare International Airport http://www.aeroportulsm.ro TWR 118.8 MHz NaN NaN
SBZ Sibiu LRSB Sibiu International Airport NaN TWR 122.7 MHz NaN NaN
SCV Suceava LRSV Suceava "Ștefan cel Mare" International Airport http://www.aeroportsuceava.ro APP 120.9 MHz TWR 118.3 MHz NaN NaN
TGM Târgu Mureș LRTM "Transilvania" Târgu Mureș Airport http://www.targumuresairport.ro APP 121.9 MHz TWR 125.9 MHz NaN NaN
TSR Timișoara LRTR Timișoara "Traian Vuia" International Airport ... http://www.aerotim.ro TWR 101.1 MHz NaN NaN
TCE Tulcea LRTC "Delta Dunarii" Tulcea Airport (Cataloi Airport) http://www.aeroportul-tulcea.ro/ APP/TWR 120.3 MHz Closed NaN

In [1]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'

In [ ]:
results = Geocoder(apik).geocode(i+' airport romania')

In [5]:
locations={}
for i in df.index:
    results = Geocoder(apik).geocode(i+' airport romania')
    locations[i]=results[0].coordinates
    print i


ARW
BCM
BAY
BBU
OTP
CLJ
CND
CRA
IAS
OMR
SUJ
SBZ
SCV
TGM
TSR
TCE

In [6]:
file("locations_ro.json",'w').write(json.dumps(locations))

In [7]:
locations=json.loads(file('locations_ro.json','r').read())

In [8]:
import requests

In [9]:
airportialinks={}
for i in locations:
    print i,
    url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+romania'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z


BCM https://www.airportia.com/romania/bacău-airport/
SCV https://www.airportia.com/romania/suceava-stefan-cel-mare-airport
CLJ https://www.airportia.com/romania/cluj_napoca-international-airport
ARW https://www.airportia.com/romania/arad-international-airport/arrivals
SBZ https://www.airportia.com/romania/sibiu-international-airport/
SUJ https://www.airportia.com/romania/satu-mare-airport/arrivals
BAY https://www.airportia.com/romania/tautii-magheraus-airport
OMR https://www.airportia.com/romania/oradea-international-airport/
CND https://www.airportia.com/romania/mihail-kogălniceanu-international-airport
CRA https://www.airportia.com/romania/craiova-airport/arrivals
OTP https://www.airportia.com/romania/henri-coandă-international-airport
BBU https://www.airportia.com/romania/băneasa...airport/departures
TCE https://www.airportia.com/romania/tulcea-airport
TSR https://www.airportia.com/romania/timişoara-traian-vuia-airport/
IAS https://www.airportia.com/romania/iaşi-airport/arrivals
TGM https://www.airportia.com/romania/transilvania-târgu-mureş-international- airport

In [10]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    if z=='TSR':airportialinks[z]='https://www.airportia.com/romania/timişoara-traian-vuia-airport/'
    print airportialinks[z]


https://www.airportia.com/romania/bacău-airport/
https://www.airportia.com/romania/tulcea-airport/
https://www.airportia.com/romania/cluj_napoca-international-airport/
https://www.airportia.com/romania/arad-international-airport/
https://www.airportia.com/romania/sibiu-international-airport/
https://www.airportia.com/romania/satu-mare-airport/
https://www.airportia.com/romania/tautii-magheraus-airport/
https://www.airportia.com/romania/oradea-international-airport/
https://www.airportia.com/romania/mihail-kogălniceanu-international-airport/
https://www.airportia.com/romania/craiova-airport/
https://www.airportia.com/romania/henri-coandă-international-airport/
https://www.airportia.com/romania/băneasa-international-airport/
https://www.airportia.com/romania/suceava-stefan-cel-mare-airport/
https://www.airportia.com/romania/timişoara-traian-vuia-airport/
https://www.airportia.com/romania/iaşi-airport/
https://www.airportia.com/romania/transilvania-târgu-mureş-international-airport/

In [11]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [12]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    if i!='TGM':
        #march 11-24 = 2 weeks
        for d in range (11,25):
            if d not in sch[i]:
                try:
                    url=airportialinks[i]
                    full=url+'departures/201703'+str(d)
                    m=requests.get(full).content
                    sch[i][full]=pd.read_html(m)[0]
                    #print full
                except: pass #print 'no tables',i,d
    else:
        #november 17-30 = 2 weeks
        for d in range (17,31):
            if d not in sch[i]:
                try:
                    url=airportialinks[i]
                    full=url+'departures/201611'+str(d)
                    m=requests.get(full).content
                    sch[i][full]=pd.read_html(m)[0]
                    #print full
                except: pass #print 'no tables',i,d


BCM
SCV
CLJ
ARW
SBZ
SUJ
BAY
OMR
CND
CRA
OTP
BBU
TCE
TSR
IAS
TGM

In [13]:
mdf=pd.DataFrame()

In [14]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['From']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])

In [15]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')

In [16]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]

In [17]:
file("mdf_ro_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [25]:
len(mdf)


Out[25]:
3034

In [18]:
airlines=set(mdf['Airline'])

In [19]:
cities=set(mdf['City'])

In [20]:
file("cities_ro_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_ro_dest.json",'w').write(json.dumps(list(airlines)))

In [26]:
citycoords={}

In [27]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i


Kiev
Paris
Oslo
Basel
Beirut
Zaragoza
Liverpool
Verona
Malmo
Castellon de la Plana
Bologna
Catania
Treviso
Brussels
Bucharest
Dubai
Dublin
Rome
Varna
Luqa
Pescara
Cologne
Milan
London
Karlsruhe/Baden-Baden
Strasbourg
Dortmund
Cluj-Napoca
Nurnberg
Amman
Chisinau
Vienna
Moscow
Bratislava
Berlin
Katowice
Weeze
Eindhoven
Stuttgart
Alicante
Tenerife
Frankfurt
Thessaloniki
Zurich
Perugia
Madrid
Bari
Doncaster
Lyon
Istanbul
Pisa
Turin
Nice
Larnaca
Memmingen
Hannover
Malaga
Hamburg
Stockholm
Tel Aviv
Timisoara
Doha
Birmingham
Florence
Athens
Satu Mare
Oradea
Valencia
Naples
Geneva
Sibiu
Munich
Glasgow
Alghero
Budapest
Dusseldorf
Barcelona
Billund
Bristol
Iasi
Belgrade
Prague
Sofia
Suceava
Lisbon
Amsterdam
Copenhagen
Warsaw

In [28]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}

In [29]:
file("citysave_ro_dest.json",'w').write(json.dumps(citysave))