In [6]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [7]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_the_United_Arab_Emirates'
df=pd.read_html(url)
df=df[0].loc[:29].T.set_index(0).T.loc[1:].set_index('IATA[2]')
df


Out[7]:
Location Emirate ICAO[1] Airport name Coordinates
IATA[2]
AUH Abu Dhabi Abu Dhabi OMAA Abu Dhabi International Airport[1] 24°25′59″N 054°39′04″E / 24.43306°N 54.65111...
AZI Abu Dhabi Abu Dhabi OMAD Al Bateen Executive Airport[1] 24°25′42″N 054°27′29″E / 24.42833°N 54.45806...
AAN Al Ain Abu Dhabi OMAL Al Ain International Airport[1] 24°15′42″N 055°36′33″E / 24.26167°N 55.60917...
DHF Mussafah Abu Dhabi OMAM Al Dhafra Air Base[3] 24°14′54″N 054°32′42″E / 24.24833°N 54.54500...
NaN Al Futaisi Abu Dhabi OMAF Futaysi Airport[1] 24°22′44″N 054°18′58″E / 24.37889°N 54.31611...
NaN Al Jazirah Al Hamra Ras al-Khaimah OMRJ Al Jazeirah Airport[1] 25°39′55″N 055°46′27″E / 25.66528°N 55.77417...
NaN Arzanah Abu Dhabi OMAR Arzanah Airport[1] 24°46′51″N 052°33′35″E / 24.78083°N 52.55972...
NaN Buhasa Abu Dhabi OMAB Buhasa Airport[1] 23°35′59″N 053°22′46″E / 23.59972°N 53.37944...
ZDY Dalma Island Abu Dhabi OMDL Dalma Airport[4][5] 24°30′11″N 052°20′09″E / 24.50306°N 52.33583...
NaN Das Island Abu Dhabi OMAS Das Island Airport[1] 25°08′30″N 052°52′20″E / 25.14167°N 52.87222...
DXB Dubai Dubai OMDB Dubai International Airport[1] 25°15′10″N 055°21′52″E / 25.25278°N 55.36444...
NHD Dubai Dubai OMDM Al Minhad Air Base[6] 25°01′37″N 055°21′58″E / 25.02694°N 55.36611...
DWC Dubai Dubai OMDW Dubai World Central - Al Maktoum International... 24°55′06″N 055°10′32″E / 24.91833°N 55.17556...
FJR Fujairah Fujairah OMFJ Fujairah International Airport[1] 25°06′44″N 056°19′27″E / 25.11222°N 56.32417...
NaN Jebel Dhana Abu Dhabi OMAJ Jebel Dhana Airport[1] 24°10′55″N 052°37′25″E / 24.18194°N 52.62361...
NaN Qarnayn Abu Dhabi OMAQ Qarnayn Airport[1] 24°56′00″N 052°51′00″E / 24.93333°N 52.85000...
RKT Ras al-Khaimah Ras al-Khaimah OMRK Ras Al Khaimah International Airport[1] 25°36′48″N 055°56′20″E / 25.61333°N 55.93889...
NaN Ras al-Khaimah Ras al-Khaimah OMRS Al Saqr Field Airport[1] 25°36′48″N 055°57′34″E / 25.61333°N 55.95944...
SHJ Sharjah Sharjah OMSJ Sharjah International Airport[1] 25°45′10″N 055°30′58″E / 25.75278°N 55.51611...
NaN Sir Bani Yas Abu Dhabi OMBY Sir Bani Yas Airport[5] 24°16′56″N 052°34′56″E / 24.28222°N 52.58222...
NaN Zirku Abu Dhabi OMAZ Zirku Airport[1] 24°51′48″N 053°04′33″E / 24.86333°N 53.07583...

In [8]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'

In [11]:
locations={}
for i in df.index:
    if str(i).lower()!='nan':
        try:
            results = Geocoder(apik).geocode(i+' airport UAE')
            locations[i]=results[0].coordinates
            print i
        except:pass


AUH
AZI
AAN
DHF
ZDY
DXB
DWC
FJR
RKT
SHJ

In [12]:
locations.pop('AZI');
locations.pop('DHF');
locations.pop('ZDY');
locations.pop('FJR');


Out[12]:
(24.5030415, 52.33364659999999)

In [13]:
file("locations_ae.json",'w').write(json.dumps(locations))

In [14]:
locations=json.loads(file('locations_ae.json','r').read())

In [15]:
import requests

In [16]:
airportialinks={}
for i in locations:
    print i,
    url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+united+arab+emirates'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z


RKT https://www.airportia.com/united-arab-emirates/ras-al-khaimah-international- airport
AZI https://www.airportia.com/united-arab-emirates/bateen-airport
AUH https://www.airportia.com/united-arab-emirates/abu-dhabi-international- airport
AAN https://www.airportia.com/united-arab-emirates/al-ain-international-airport
DWC https://www.airportia.com/united-arab-emirates/al-maktoum-international- airport
FJR https://www.airportia.com/united-arab-emirates/fujairah-international-airport
SHJ https://www.airportia.com/united-arab-emirates/sharjah-international-airport
DXB https://www.airportia.com/united-arab-emirates/dubai-international-airport

In [17]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    print airportialinks[z]


https://www.airportia.com/united-arab-emirates/ras-al-khaimah-international-airport/
https://www.airportia.com/united-arab-emirates/bateen-airport/
https://www.airportia.com/united-arab-emirates/abu-dhabi-international-airport/
https://www.airportia.com/united-arab-emirates/al-ain-international-airport/
https://www.airportia.com/united-arab-emirates/al-maktoum-international-airport/
https://www.airportia.com/united-arab-emirates/fujairah-international-airport/
https://www.airportia.com/united-arab-emirates/sharjah-international-airport/
https://www.airportia.com/united-arab-emirates/dubai-international-airport/

In [18]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.


In [19]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'arrivals/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d


RKT
AZI
AUH
AAN
DWC
FJR
SHJ
DXB

In [42]:
mdf=pd.DataFrame()

In [43]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['To']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])

In [44]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')

In [45]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]

In [46]:
file("mdf_ae_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))

In [47]:
len(mdf)


Out[47]:
12955

In [48]:
airlines=set(mdf['Airline'])

In [49]:
cities=set(mdf['City'])

In [50]:
file("cities_ae_arrv.json",'w').write(json.dumps(list(cities)))
file("airlines_ae_arrv.json",'w').write(json.dumps(list(airlines)))

In [51]:
citycoords={}

In [53]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        elif i==u'Victoria': z='Victoria, Seychelles'
        elif i==u'Washington': z='Washington, DC'
        elif i==u'Odessa': z='Odessa, Ukraine'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i


Basel
Bratislava
Mineralnye Vody
Johannesburg
Liege
Perth
Hargeisa
San Francisco
Beirut
Jizan
Bahrain
Zaragoza
Bushehr
Chennai
Abu Dhabi
Delhi
Tyumen
Victoria
Baghdad
Mangalore
Newcastle
Catania
Chandigarh
Brussels
Ahmedabad
Jeddah
Varanasi
Dubai
Addis Ababa
Kigali
Chittagong
Sulaimaniyah
Dublin
Nagpur
Phnom Penh
Rome
Accra
Denpasar
Kunming
Mogadishu
Taipei
Ostrava
Alexandria
Casablanca
Qeshm Island
Brisbane
Cairo
Osaka
Washington
Bandar Abbas
Milan
London
Sialkot
Seattle
Abadan
Rostov-on-Don
Cochin
Shiraz
Yangon
Auckland
Angeles City
Dakar
Kish Island
Cluj-Napoca
Damascus
Amritsar
Sydney
Amman
Kolkata
Yekaterinburg
Basra
Multan
Edinburgh
Sao Paulo
Zanzibar
Seoul
Stockholm
Singapore
Moscow
Rahim Yar Khan
Assiut
Kuala Lumpur
Riyan
Berlin
Katowice
Los Angeles
Tiruchirapalli
Tehran
Manchester
Bangalore
Astana
Sana'a
Sohag
Karachi
Kuwait City
Lucknow
Fort Lauderdale
Asmara
Rabat
Cape Town
Stuttgart
Dammam
Bologna
Hambantota
Kathmandu
Al Ain
Male
Frankfurt
Hyderabad
Sylhet
Ashgabat
Zurich
Madrid
Baku
Bangor
Novosibirsk
Adelaide
Lyon
Abha
Istanbul
Dallas
Hofuf
Hanoi
Mashhad
Luxembourg
Manila
Lahore
Vienna
Muscat
Nice
Juba
Brunei
St. Petersburg
Luanda
Skopje
Durban
Jaipur
Khartoum
Medina
Salalah
Ra'sal-Khaymah
Larnaca
Quetta
Ahvaz
Faisalabad
Mombasa
Bishkek
Hong Kong
Toronto
Kandahar
Algiers
Hannover
Kazan
Tokyo
Islamabad
Rio de Janeiro
Hamburg
Orlando
Athens
Tabuk
Bosaso
Wuhan
Tunis
Ha'il
Sharjah
Sayun
Aden
Isfahan
Nairobi
Yerevan
Bucharest
Beijing
Madurai
Najaf
Doha
Memphis
Kozhikode
Djibouti
New York
Birmingham
Boston
Taif
Venice
Mumbai
Shanghai
Goa
Pune
Entebbe
Kabul
Ho Chi Minh City
Al-Qassim
Tashkent
Turbat
Lamerd
Glasgow
Chabahar
Odessa
Benghazi
Phuket
Geneva
Helsinki
Dushanbe
Munich
Bangkok
Krasnodar
Yinchuan
Colombo
Peshawar
Zahedan
Lar
Chongqing
Dhaka
Budapest
Dar-es-Salaam
Coimbatore
Jakarta
Dusseldorf
Barcelona
Urumqi
Melbourne
Minsk
Sarajevo
Almaty
Tbilisi
Port Louis
Houston
Chengdu
Belgrade
Riyadh
Prague
Sofia
Thiruvananthapuram
Samara
Lagos
Chicago
Lisbon
Amsterdam
Copenhagen
Lusaka
Sakaka
Warsaw

In [54]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}

In [55]:
file("citysave_ae_arrv.json",'w').write(json.dumps(citysave))