In [34]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [35]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
In [73]:
locations=json.loads(file('locations_hu.json','r').read())
In [74]:
mdf_dest=pd.read_json(json.loads(file('mdf_hu_dest.json','r').read()))
mdf_arrv=pd.read_json(json.loads(file('mdf_hu_arrv.json','r').read()))
In [75]:
citysave_dest=json.loads(file('citysave_hu_dest.json','r').read())
citysave_arrv=json.loads(file('citysave_hu_arrv.json','r').read())
In [76]:
mdf_dest['ID']=mdf_dest['From']
mdf_dest.head()
Out[76]:
In [77]:
mdf_arrv['ID']=mdf_arrv['To']
mdf_arrv.head()
Out[77]:
In [78]:
mdf=pd.concat([mdf_dest,mdf_arrv])
In [79]:
len(mdf_dest)
Out[79]:
In [80]:
len(mdf_arrv)
Out[80]:
In [81]:
mdf
Out[81]:
In [82]:
mdg=mdf.set_index(['ID','City','Airport','Airline'])
In [83]:
k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA']
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/20170318'
k[k['Date']==testurl]
Out[83]:
In [84]:
k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA']
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318'
k[k['Date']==testurl]
Out[84]:
In [85]:
k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA']
for i in range(11,25):
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/201703'+str(i)
print 'BUD-FRA March',i, 'departures',len(k[k['Date']==testurl]),
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i)
print 'arrivals', len(k[k['Date']==testurl])
In [88]:
len(k)/14
Out[88]:
mdg
checks out with source
In [89]:
flights={}
minn=1.0 #want to see minimum 1 flight in the past 2 weeks
for i in mdg.index.get_level_values(0).unique():
#2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv
d=4.0
if i not in flights:flights[i]={}
for j in mdg.loc[i].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period at least once every 2 weeks
if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0}
flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d
for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j].loc[k])>minn:
if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d
for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique():
try:
if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn:
if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0}
flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d
except:pass
In [90]:
file("flights_hu.json",'w').write(json.dumps(flights))