In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [9]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_the_United_Arab_Emirates'
df=pd.read_html(url)
df=df[0].loc[:29].T.set_index(0).T.loc[1:].set_index('IATA[2]')
df
Out[9]:
In [10]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
In [12]:
locations={}
for i in df.index:
if str(i).lower()!='nan':
try:
results = Geocoder(apik).geocode(i+' airport UAE')
locations[i]=results[0].coordinates
print i
except:pass
In [39]:
locations.pop('AZI');
locations.pop('DHF');
locations.pop('ZDY');
locations.pop('FJR');
In [19]:
file("locations_ae.json",'w').write(json.dumps(locations))
In [20]:
locations=json.loads(file('locations_ae.json','r').read())
In [21]:
import requests
In [23]:
airportialinks={}
for i in locations:
print i,
url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+united+arab+emirates'
m=requests.get(url).content
z=pd.read_html(m)[5][0][0]
z=z[z.find('http'):]
airportialinks[i]=z
print z
In [24]:
#reformat
for z in airportialinks:
airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
if airportialinks[z][-1]!='/':airportialinks[z]+='/'
#manual fixes
print airportialinks[z]
In [25]:
sch={}
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
In [26]:
for i in locations:
print i
if i not in sch:sch[i]={}
#march 11-24 = 2 weeks
for d in range (11,25):
if d not in sch[i]:
try:
url=airportialinks[i]
full=url+'departures/201703'+str(d)
m=requests.get(full).content
sch[i][full]=pd.read_html(m)[0]
#print full
except: pass #print 'no tables',i,d
In [40]:
mdf=pd.DataFrame()
In [41]:
for i in sch:
for d in sch[i]:
df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
df['From']=i
df['Date']=d
mdf=pd.concat([mdf,df])
In [42]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
In [43]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
In [44]:
file("mdf_ae_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))
In [45]:
len(mdf)
Out[45]:
In [46]:
airlines=set(mdf['Airline'])
In [47]:
cities=set(mdf['City'])
In [48]:
file("cities_ae_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_ae_dest.json",'w').write(json.dumps(list(airlines)))
In [49]:
citycoords={}
In [50]:
for i in cities:
if i not in citycoords:
if i==u'Birmingham': z='Birmingham, UK'
elif i==u'Valencia': z='Valencia, Spain'
elif i==u'Naples': z='Naples, Italy'
elif i==u'St. Petersburg': z='St. Petersburg, Russia'
elif i==u'Bristol': z='Bristol, UK'
elif i==u'Victoria': z='Victoria, Seychelles'
elif i==u'Washington': z='Washington, DC'
elif i==u'Odessa': z='Odessa, Ukraine'
else: z=i
citycoords[i]=Geocoder(apik).geocode(z)
print i
In [53]:
citysave={}
for i in citycoords:
citysave[i]={"coords":citycoords[i][0].coordinates,
"country":citycoords[i][0].country}
In [54]:
file("citysave_ae_dest.json",'w').write(json.dumps(citysave))