In [35]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [36]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
In [37]:
locations=json.loads(file('locations_ro.json','r').read())
In [38]:
mdf_dest=pd.read_json(json.loads(file('mdf_ro_dest.json','r').read()))
mdf_arrv=pd.read_json(json.loads(file('mdf_ro_arrv.json','r').read()))
In [39]:
citysave_dest=json.loads(file('citysave_ro_dest.json','r').read())
citysave_arrv=json.loads(file('citysave_ro_arrv.json','r').read())
In [40]:
mdf_dest['ID']=mdf_dest['From']
mdf_dest.head()
Out[40]:
In [41]:
mdf_arrv['ID']=mdf_arrv['To']
mdf_arrv.head()
Out[41]:
In [42]:
mdf=pd.concat([mdf_dest,mdf_arrv])
In [43]:
mdf
Out[43]:
In [44]:
mdg=mdf.set_index(['ID','City','Airport','Airline'])
In [45]:
len(mdg)
Out[45]:
mdg
check out with source
In [46]:
flights={}
minn=1.0
for i in mdg.index.get_level_values(0).unique():
#2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv
d=4.0
if i not in flights:flights[i]={}
for j in mdg.loc[i].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period once every 2 weeks
if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0}
flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d
for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j].loc[k])>minn:
if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d
for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn:
if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0}
flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d
manual fix TGM - all flights are departing from CLJ, therefore doublecounting + BUD not represented
In [47]:
flights['TGM']['Budapest']=flights['CLJ']['Budapest']
In [48]:
for j in flights['TGM']:
if flights['CLJ'][j]['7freq']-flights['TGM'][j]['7freq']>0:
flights['CLJ'][j]['7freq']-=flights['TGM'][j]['7freq']
ap=list(flights['TGM'][j]['airports'].keys())[0]
flights['CLJ'][j]['airports'][ap]['7freq']-=flights['TGM'][j]['7freq']
flights['CLJ'][j]['airports'][ap]['airlines'][u'Wizz Air']['7freq']-=flights['TGM'][j]['7freq']
else: flights['CLJ'].pop(j)
In [49]:
file("flights_ro.json",'w').write(json.dumps(flights))