In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_the_United_Kingdom'
df=pd.read_html(url)
df=df[0].loc[:29].T.set_index(0).T.loc[1:].set_index('IATA')
df


Out[2]:
Location County ICAO Airport name Usage Rwy (ft) Surface
IATA
NaN East Midlands NaN NaN NaN NaN NaN NaN
NaN Brackley Northamptonshire NaN Hinton-in-the-Hedges Airfield Private NaN Tarmac
NaN Brackley Northamptonshire EGBT Turweston Aerodrome Public 3002 Asphalt
NaN Bruntingthorpe Leicestershire NaN Bruntingthorpe Aerodrome Private 9842 Asphalt
EMA Castle Donington Leicestershire EGNX East Midlands Airport Public 9491 Asphalt
QCY Coningsby Lincolnshire EGXC RAF Coningsby Military 9003 Concrete
NaN Cranwell Lincolnshire EGYD RAF Cranwell Military 6831 Asphalt
NaN Derby Derbyshire EGBD Derby Airfield Public 1975 Grass
NaN Grantham Lincolnshire EGYE RAF Barkston Heath Military 6007 Asphalt
NaN Kettering Northamptonshire NaN Deenethorpe Aerodrome Private NaN NaN
NaN Leicester Leicestershire EGBG Leicester Airport Public 3084 Asphalt
NaN Lincoln Lincolnshire EGCS Sturgate Airfield Public 2690 Paved
NaN Lincoln Lincolnshire EGNW Wickenby Aerodrome Public 1739 Concrete
NaN Newark-on-Trent Nottinghamshire EGXY RAF Syerston Military 5994 Asphalt
NaN Nottingham Nottinghamshire EGNA Hucknall Airfield Public 2838 Grass
NaN Retford Nottinghamshire EGNE Gamston Airport Public 5522 Asphalt
SQZ Scampton Lincolnshire EGXP RAF Scampton Military 8990 Asphalt
NaN Silverstone Northamptonshire EGBV Silverstone Heliport Public 1476 Grass
NaN Spalding Lincolnshire EGCL Fenland Airfield Public 1949 Grass
NaN Stamford Lincolnshire EGXT RAF Wittering Military 9052 Asphalt
NaN Strubby Lincolnshire EGCG Strubby Airfield[2] Public 2461 Paved
ORM Sywell Northamptonshire EGBK Sywell Aerodrome Private 2982 Grass
NQT Tollerton Nottinghamshire EGBN Nottingham Airport Private 3445 Asphalt/Concrete
WTN Waddington Lincolnshire EGXW RAF Waddington Military 9000 Asphalt
NaN Worksop Nottinghamshire EGNF Netherthorpe Airfield Public 1814 Grass
NaN East of England NaN NaN NaN NaN NaN NaN
NaN Aldenham Hertfordshire EGTR Elstree Airfield Public 2136 Asphalt
NaN Saffron Walden Essex EGO2 Audley End Airfield Private 2467 Grass
NaN Beccles Suffolk EGSM Beccles Airport Public 2283 Concrete/Grass

In [7]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'

In [9]:
#UK too complicated, overwrite manually
uka=["EMA",
"LTN",
"NWI",
"SEN",
"STN",
"LHR",
"LCY",
"MME",
"NCL",
"BLK",
"LPL",
"MAN",
"LGW",
"SOU",
"OXF",
"LYX",
"GLO",
"EXT",
"BOH",
"NQY",
"LEQ",
"ISC",
"BRS",
"BHX",
"LBA",
"DSA",
"HUY",
"BFS",
"BHD",
"LDY",
"ABZ",
"OBN",
"BEB",
"BRR",
"CAL",
"COL",
"CSA",
"NRL",
"INV",
"DND",
"LSI",
"EOI",
"EDI",
"FIE",
"ILY",
"NDY",
"PIK",
"PPW",
"GLA",
"KOY",
"SYY",
"SOY",
"LWK",
"TRE",
"WRY",
"WIC",
"VLY",
"CWL",
"ACI",
"GCI",
"IOM",
"JER"]

In [10]:
locations={}
for i in uka:
    if str(i).lower()!='nan':
        try:
            results = Geocoder(apik).geocode(i+' airport UK')
            locations[i]=results[0].coordinates
            print i
        except:pass


EMA
LTN
NWI
SEN
STN
LHR
LCY
MME
NCL
BLK
LPL
MAN
LGW
SOU
OXF
LYX
GLO
EXT
BOH
NQY
LEQ
ISC
BRS
BHX
LBA
DSA
HUY
BFS
BHD
LDY
ABZ
OBN
BEB
BRR
CAL
COL
CSA
NRL
INV
DND
LSI
EOI
EDI
ILY
NDY
PIK
PPW
GLA
KOY
SYY
SOY
LWK
TRE
WRY
WIC
VLY
CWL
GCI
IOM
JER

In [11]:
file("locations_uk.json",'w').write(json.dumps(locations))

In [12]:
locations=json.loads(file('locations_uk.json','r').read())

In [13]:
import requests

In [14]:
airportialinks={}
for i in locations:
    print i,
    url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+united+kingdom'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z


OBN https://www.airportia.com/united-kingdom/oban-airport
EMA https://www.airportia.com/united-kingdom/east-midlands-airport/arrivals
GLO https://www.airportia.com/united-kingdom/gloucestershire-airport
BEB https://www.airportia.com/united-kingdom/benbecula-airport/arrivals
GLA https://www.airportia.com/united-kingdom/glasgow-international-airport
EOI https://www.airportia.com/united-kingdom/eday-airport
BHD https://www.airportia.com/united-kingdom/george...airport/arrivals
BOH https://www.airportia.com/united-kingdom/bournemouth-airport
LWK /
EXT https://www.airportia.com/united-kingdom/exeter-international-airport
CAL /
ISC https://www.airportia.com/united-kingdom/
LCY https://www.airportia.com/united-kingdom/london-city-airport
COL https://www.airportia.com/united-kingdom/
ILY https://www.airportia.com/united-kingdom/islay-airport
LBA https://www.airportia.com/united-kingdom/leeds-bradford-airport
PIK https://www.airportia.com/united-kingdom/glasgow-prestwick-airport
LEQ https://www.airportia.com/united-kingdom/
NQY /
LYX https://www.airportia.com/united-kingdom/lydd-airport
LSI s
VLY /
IOM https://www.airportia.com/isle-of-man/isle-of-man-airport/arrivals
PPW https://www.airportia.com/united-kingdom/westray-airport/
SOY /
SOU https://www.airportia.com/united-kingdom/southampton-airport
LDY https://www.airportia.com/united-kingdom/city-of-derry-airport
DSA https://www.airportia.com/united-kingdom/robin-hood-doncaster-sheffield- airport
HUY https://www.airportia.com/united-kingdom/humberside-airport
JER https://www.airportia.com/jersey/jersey-airport
DND https://www.airportia.com/united-kingdom/
NDY /
GCI https://www.airportia.com/guernsey/guernsey-airport/arrivals
CSA https://www.airportia.com/united-kingdom/
STN https://www.airportia.com/united-kingdom/...stansted-airport/arrivals
TRE https://www.airportia.com/united-kingdom/tiree-airport
BFS https://www.airportia.com/united-kingdom/belfast-international-airport
LPL https://www.airportia.com/united-kingdom/liverpool-john-lennon-airport
CWL https://www.airportia.com/united-kingdom/cardiff-international-airport
BLK https://www.airportia.com/united-kingdom/blackpool-international-airport
SEN https://www.airportia.com/united-kingdom/southend-airport/arrivals/
ABZ https://www.airportia.com/united-kingdom/aberdeen-dyce-airport/departures/
LTN https://www.airportia.com/united-kingdom/...luton-airport/departures
WRY https://www.airportia.com/united-kingdom/westray-airport/
KOY https://www.airportia.com/united-kingdom/kirkwall-airport
WIC /
NRL https://www.airportia.com/flights/lm314/north.../papa_westray/
OXF https://www.airportia.com/united-kingdom/oxford/arrivals/
INV /
EDI https://www.airportia.com/united-kingdom/edinburgh-airport/departures
BRS https://www.airportia.com/united-kingdom/bristol...airport/departures/
BRR https://www.airportia.com/united-kingdom/barra-airport/
NWI https://www.airportia.com/united-kingdom/norwich-international-airport/
BHX https://www.airportia.com/united-kingdom/birmingham-international-airport
NCL https://www.airportia.com/united-kingdom/newcastle-airport
MME /
SYY /
MAN https://www.airportia.com/united-kingdom/manchester-airport/departures
LGW https://www.airportia.com/united-kingdom/london-gatwick-airport
LHR https://www.airportia.com/united-kingdom/london-heathrow-airport

In [24]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    print airportialinks[z]


https://www.airportia.com/united-arab-emirates/ras-al-khaimah-international-airport/
https://www.airportia.com/united-arab-emirates/bateen-airport/
https://www.airportia.com/united-arab-emirates/abu-dhabi-international-airport/
https://www.airportia.com/united-arab-emirates/al-ain-international-airport/
https://www.airportia.com/united-arab-emirates/al-maktoum-international-airport/
https://www.airportia.com/united-arab-emirates/fujairah-international-airport/
https://www.airportia.com/united-arab-emirates/sharjah-international-airport/
https://www.airportia.com/united-arab-emirates/dubai-international-airport/

In [25]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [26]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'departures/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d


RKT
AZI
AUH
AAN
DWC
FJR
SHJ
DXB

In [40]:
mdf=pd.DataFrame()

In [41]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['From']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])

In [42]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')

In [43]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]

In [44]:
file("mdf_ae_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [45]:
len(mdf)


Out[45]:
13003

In [46]:
airlines=set(mdf['Airline'])

In [47]:
cities=set(mdf['City'])

In [48]:
file("cities_ae_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_ae_dest.json",'w').write(json.dumps(list(airlines)))

In [49]:
citycoords={}

In [50]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        elif i==u'Victoria': z='Victoria, Seychelles'
        elif i==u'Washington': z='Washington, DC'
        elif i==u'Odessa': z='Odessa, Ukraine'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i


Liege
Basra
Kiev
Yanbu
Arbil
Paris
Oslo
Rio de Janeiro
Mineralnye Vody
Johannesburg
Perth
Hargeisa
San Francisco
Beirut
Jizan
Bahrain
Rahim Yar Khan
Eldoret
Chennai
Abu Dhabi
Delhi
Tyumen
Victoria
Baghdad
Jaipur
Mangalore
Newcastle
Brussels
Ahmedabad
Jeddah
Varanasi
Fukuoka
Pune
Addis Ababa
Kigali
Burgas
Chittagong
Yinchuan
Riyan
Nagpur
Ankara
Lilongwe
Rome
Accra
Denpasar
Kunming
Mogadishu
Taipei
Ostrava
Alexandria
Casablanca
Qeshm Island
Brisbane
Cairo
Osaka
Washington
Bandar Abbas
Milan
Manchester
Juba
Sialkot
Seattle
Abadan
Rostov-on-Don
Karachi
Shiraz
Yangon
Auckland
Stuttgart
Kish Island
Ouagadougou
Damascus
Amritsar
Dubai
Zanzibar
Sydney
Amman
Kolkata
Yekaterinburg
Krasnodar
Multan
Edinburgh
Sao Paulo
Mashhad
Seoul
Stockholm
Singapore
Moscow
Guangzhou
Assiut
Kuala Lumpur
Berlin
Katowice
Los Angeles
Tiruchirapalli
Tehran
London
Bangalore
Astana
Sohag
Cochin
Conakry
Kuwait City
Hamburg
Lucknow
Zaragoza
Asmara
Rabat
Cape Town
Sylhet
Kabul
Dammam
Bologna
Bratislava
Hofuf
Kathmandu
Al Ain
Male
Frankfurt
Lusaka
Hyderabad
Ashgabat
Zurich
Madrid
Baku
Kerman
Port Louis
Novosibirsk
Bushehr
Adelaide
Lyon
Abha
Istanbul
Dallas
Hanoi
Pisa
St. Petersburg
Cebu
Manila
Lahore
Muscat
Nice
Cluj-Napoca
Fort Lauderdale
Luanda
Skopje
Durban
Larnaca
Chicago
Khartoum
Medina
Salalah
Ra'sal-Khaymah
Brunei
Helsinki
Ahvaz
Faisalabad
Mombasa
Bishkek
Hong Kong
Toronto
Kandahar
Algiers
Hannover
Kazan
Peshawar
Islamabad
Ostend
Orlando
Tabuk
Bosaso
Wuhan
Tunis
Ha'il
Sharjah
Sayun
Aden
Isfahan
Nairobi
Yerevan
Bucharest
Beijing
Madurai
Najaf
Doha
Cagliari
Kozhikode
Djibouti
New York
Birmingham
Boston
Taif
Venice
Mumbai
Shanghai
Goa
Athens
Entebbe
Dublin
Ho Chi Minh City
Al-Qassim
Tashkent
Turbat
Lamerd
Glasgow
Chabahar
Odessa
Benghazi
Phuket
Geneva
Tokyo
Dushanbe
Munich
Bangkok
Sulaimaniyah
Coimbatore
Vienna
Lar
Chongqing
Dhaka
Budapest
Dar-es-Salaam
Chandigarh
Dakar
Jakarta
Dusseldorf
Barcelona
Urumqi
Melbourne
Minsk
Sarajevo
Almaty
Phnom Penh
Tbilisi
Houston
Chengdu
Belgrade
Riyadh
Prague
Sofia
Thiruvananthapuram
Samara
Lagos
Colombo
Port Sudan
Lisbon
Amsterdam
Copenhagen
Quetta
Sakaka
Warsaw

In [53]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}

In [54]:
file("citysave_ae_dest.json",'w').write(json.dumps(citysave))