In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_the_United_Arab_Emirates'
df=pd.read_html(url)
df=df[0].loc[:29].T.set_index(0).T.loc[1:].set_index('IATA[2]')
df


Out[9]:
Location Emirate ICAO[1] Airport name Coordinates
IATA[2]
AUH Abu Dhabi Abu Dhabi OMAA Abu Dhabi International Airport[1] 24°25′59″N 054°39′04″E / 24.43306°N 54.65111...
AZI Abu Dhabi Abu Dhabi OMAD Al Bateen Executive Airport[1] 24°25′42″N 054°27′29″E / 24.42833°N 54.45806...
AAN Al Ain Abu Dhabi OMAL Al Ain International Airport[1] 24°15′42″N 055°36′33″E / 24.26167°N 55.60917...
DHF Mussafah Abu Dhabi OMAM Al Dhafra Air Base[3] 24°14′54″N 054°32′42″E / 24.24833°N 54.54500...
NaN Al Futaisi Abu Dhabi OMAF Futaysi Airport[1] 24°22′44″N 054°18′58″E / 24.37889°N 54.31611...
NaN Al Jazirah Al Hamra Ras al-Khaimah OMRJ Al Jazeirah Airport[1] 25°39′55″N 055°46′27″E / 25.66528°N 55.77417...
NaN Arzanah Abu Dhabi OMAR Arzanah Airport[1] 24°46′51″N 052°33′35″E / 24.78083°N 52.55972...
NaN Buhasa Abu Dhabi OMAB Buhasa Airport[1] 23°35′59″N 053°22′46″E / 23.59972°N 53.37944...
ZDY Dalma Island Abu Dhabi OMDL Dalma Airport[4][5] 24°30′11″N 052°20′09″E / 24.50306°N 52.33583...
NaN Das Island Abu Dhabi OMAS Das Island Airport[1] 25°08′30″N 052°52′20″E / 25.14167°N 52.87222...
DXB Dubai Dubai OMDB Dubai International Airport[1] 25°15′10″N 055°21′52″E / 25.25278°N 55.36444...
NHD Dubai Dubai OMDM Al Minhad Air Base[6] 25°01′37″N 055°21′58″E / 25.02694°N 55.36611...
DWC Dubai Dubai OMDW Dubai World Central - Al Maktoum International... 24°55′06″N 055°10′32″E / 24.91833°N 55.17556...
FJR Fujairah Fujairah OMFJ Fujairah International Airport[1] 25°06′44″N 056°19′27″E / 25.11222°N 56.32417...
NaN Jebel Dhana Abu Dhabi OMAJ Jebel Dhana Airport[1] 24°10′55″N 052°37′25″E / 24.18194°N 52.62361...
NaN Qarnayn Abu Dhabi OMAQ Qarnayn Airport[1] 24°56′00″N 052°51′00″E / 24.93333°N 52.85000...
RKT Ras al-Khaimah Ras al-Khaimah OMRK Ras Al Khaimah International Airport[1] 25°36′48″N 055°56′20″E / 25.61333°N 55.93889...
NaN Ras al-Khaimah Ras al-Khaimah OMRS Al Saqr Field Airport[1] 25°36′48″N 055°57′34″E / 25.61333°N 55.95944...
SHJ Sharjah Sharjah OMSJ Sharjah International Airport[1] 25°45′10″N 055°30′58″E / 25.75278°N 55.51611...
NaN Sir Bani Yas Abu Dhabi OMBY Sir Bani Yas Airport[5] 24°16′56″N 052°34′56″E / 24.28222°N 52.58222...
NaN Zirku Abu Dhabi OMAZ Zirku Airport[1] 24°51′48″N 053°04′33″E / 24.86333°N 53.07583...

In [10]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'

In [12]:
locations={}
for i in df.index:
    if str(i).lower()!='nan':
        try:
            results = Geocoder(apik).geocode(i+' airport UAE')
            locations[i]=results[0].coordinates
            print i
        except:pass


AUH
AZI
AAN
DHF
ZDY
DXB
DWC
FJR
RKT
SHJ

In [39]:
locations.pop('AZI');
locations.pop('DHF');
locations.pop('ZDY');
locations.pop('FJR');

In [19]:
file("locations_ae.json",'w').write(json.dumps(locations))

In [20]:
locations=json.loads(file('locations_ae.json','r').read())

In [21]:
import requests

In [23]:
airportialinks={}
for i in locations:
    print i,
    url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+united+arab+emirates'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z


RKT https://www.airportia.com/united-arab-emirates/ras-al-khaimah-international- airport
AZI https://www.airportia.com/united-arab-emirates/bateen-airport
AUH https://www.airportia.com/united-arab-emirates/abu-dhabi-international- airport
AAN https://www.airportia.com/united-arab-emirates/al-ain-international-airport
DWC https://www.airportia.com/united-arab-emirates/al-maktoum-international- airport
FJR https://www.airportia.com/united-arab-emirates/fujairah-international-airport
SHJ https://www.airportia.com/united-arab-emirates/sharjah-international-airport
DXB https://www.airportia.com/united-arab-emirates/dubai-international-airport

In [24]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    print airportialinks[z]


https://www.airportia.com/united-arab-emirates/ras-al-khaimah-international-airport/
https://www.airportia.com/united-arab-emirates/bateen-airport/
https://www.airportia.com/united-arab-emirates/abu-dhabi-international-airport/
https://www.airportia.com/united-arab-emirates/al-ain-international-airport/
https://www.airportia.com/united-arab-emirates/al-maktoum-international-airport/
https://www.airportia.com/united-arab-emirates/fujairah-international-airport/
https://www.airportia.com/united-arab-emirates/sharjah-international-airport/
https://www.airportia.com/united-arab-emirates/dubai-international-airport/

In [25]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [26]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'departures/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d


RKT
AZI
AUH
AAN
DWC
FJR
SHJ
DXB

In [40]:
mdf=pd.DataFrame()

In [41]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['From']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])

In [42]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')

In [43]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]

In [44]:
file("mdf_ae_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [45]:
len(mdf)


Out[45]:
13003

In [46]:
airlines=set(mdf['Airline'])

In [47]:
cities=set(mdf['City'])

In [48]:
file("cities_ae_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_ae_dest.json",'w').write(json.dumps(list(airlines)))

In [49]:
citycoords={}

In [50]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        elif i==u'Victoria': z='Victoria, Seychelles'
        elif i==u'Washington': z='Washington, DC'
        elif i==u'Odessa': z='Odessa, Ukraine'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i


Liege
Basra
Kiev
Yanbu
Arbil
Paris
Oslo
Rio de Janeiro
Mineralnye Vody
Johannesburg
Perth
Hargeisa
San Francisco
Beirut
Jizan
Bahrain
Rahim Yar Khan
Eldoret
Chennai
Abu Dhabi
Delhi
Tyumen
Victoria
Baghdad
Jaipur
Mangalore
Newcastle
Brussels
Ahmedabad
Jeddah
Varanasi
Fukuoka
Pune
Addis Ababa
Kigali
Burgas
Chittagong
Yinchuan
Riyan
Nagpur
Ankara
Lilongwe
Rome
Accra
Denpasar
Kunming
Mogadishu
Taipei
Ostrava
Alexandria
Casablanca
Qeshm Island
Brisbane
Cairo
Osaka
Washington
Bandar Abbas
Milan
Manchester
Juba
Sialkot
Seattle
Abadan
Rostov-on-Don
Karachi
Shiraz
Yangon
Auckland
Stuttgart
Kish Island
Ouagadougou
Damascus
Amritsar
Dubai
Zanzibar
Sydney
Amman
Kolkata
Yekaterinburg
Krasnodar
Multan
Edinburgh
Sao Paulo
Mashhad
Seoul
Stockholm
Singapore
Moscow
Guangzhou
Assiut
Kuala Lumpur
Berlin
Katowice
Los Angeles
Tiruchirapalli
Tehran
London
Bangalore
Astana
Sohag
Cochin
Conakry
Kuwait City
Hamburg
Lucknow
Zaragoza
Asmara
Rabat
Cape Town
Sylhet
Kabul
Dammam
Bologna
Bratislava
Hofuf
Kathmandu
Al Ain
Male
Frankfurt
Lusaka
Hyderabad
Ashgabat
Zurich
Madrid
Baku
Kerman
Port Louis
Novosibirsk
Bushehr
Adelaide
Lyon
Abha
Istanbul
Dallas
Hanoi
Pisa
St. Petersburg
Cebu
Manila
Lahore
Muscat
Nice
Cluj-Napoca
Fort Lauderdale
Luanda
Skopje
Durban
Larnaca
Chicago
Khartoum
Medina
Salalah
Ra'sal-Khaymah
Brunei
Helsinki
Ahvaz
Faisalabad
Mombasa
Bishkek
Hong Kong
Toronto
Kandahar
Algiers
Hannover
Kazan
Peshawar
Islamabad
Ostend
Orlando
Tabuk
Bosaso
Wuhan
Tunis
Ha'il
Sharjah
Sayun
Aden
Isfahan
Nairobi
Yerevan
Bucharest
Beijing
Madurai
Najaf
Doha
Cagliari
Kozhikode
Djibouti
New York
Birmingham
Boston
Taif
Venice
Mumbai
Shanghai
Goa
Athens
Entebbe
Dublin
Ho Chi Minh City
Al-Qassim
Tashkent
Turbat
Lamerd
Glasgow
Chabahar
Odessa
Benghazi
Phuket
Geneva
Tokyo
Dushanbe
Munich
Bangkok
Sulaimaniyah
Coimbatore
Vienna
Lar
Chongqing
Dhaka
Budapest
Dar-es-Salaam
Chandigarh
Dakar
Jakarta
Dusseldorf
Barcelona
Urumqi
Melbourne
Minsk
Sarajevo
Almaty
Phnom Penh
Tbilisi
Houston
Chengdu
Belgrade
Riyadh
Prague
Sofia
Thiruvananthapuram
Samara
Lagos
Colombo
Port Sudan
Lisbon
Amsterdam
Copenhagen
Quetta
Sakaka
Warsaw

In [53]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}

In [54]:
file("citysave_ae_dest.json",'w').write(json.dumps(citysave))