In [37]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [39]:
SC=json.loads(file('../json/SC2.json','r').read())
I3=json.loads(file('../json/I3.json','r').read())
In [43]:
for c in SC:
if len(SC[c])>0:
print c,
#read mdf data
cpath=I3[c].lower()
cpath='../countries/'+cpath
mdf_dest=pd.read_json(json.loads(file(cpath+'/json/mdf_dest.json','r').read()))
mdf_arrv=pd.read_json(json.loads(file(cpath+'/json/mdf_arrv.json','r').read()))
mdf_dest['ID']=mdf_dest['From']
mdf_arrv['ID']=mdf_arrv['To']
mdf=pd.concat([mdf_dest,mdf_arrv])
#save combined - not necessary, space hog
#file(cpath+"/json/mdf.json",'w').write(json.dumps(mdf.reset_index().to_json()))
#parse data into flights
mdg=mdf.set_index(['ID','City','Airport','Airline'])
flights={}
minn=1.0 #want to see minimum 1 flight in the past 2 weeks
for i in mdg.index.get_level_values(0).unique():
#2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv
d=4.0
if i not in flights:flights[i]={}
for j in mdg.loc[i].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period at least once every 2 weeks
if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0}
flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d
for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j].loc[k])>minn:
if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d
for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique():
try:
if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn:
if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0}
flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d
except:pass
file(cpath+"/json/flights.json",'w').write(json.dumps(flights))
Global
In [ ]:
dbpath='E:/Dropbox/Public/datarepo/aviation/' #large file db path
MDF_dest=json.loads(file(dbpath+'json/MDF_dest.json','r').read())
MDF_arrv=json.loads(file(dbpath+'json/MDF_arrv.json','r').read())
In [50]:
gdf_dest=pd.DataFrame(MDF_dest)
gdf_arrv=pd.DataFrame(MDF_arrv)
In [57]:
gdf_dest['ID']=gdf_dest['From']
gdf_arrv['ID']=gdf_arrv['To']
gdf=pd.concat([gdf_dest,gdf_arrv])
In [58]:
#parse data into flights
mdg=gdf.set_index(['ID','City','Airport','Airline'])
In [54]:
import os.path
directory='../countries/wd'
if not os.path.exists(directory) :
os.makedirs(directory)
for j in ['code','d3','json','map']:
if not os.path.exists(directory+'/'+j):
os.makedirs(directory+'/'+j)
In [ ]:
flights={}
minn=1.0 #want to see minimum 1 flight in the past 2 weeks
for i in mdg.index.get_level_values(0).unique():
#2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv
d=4.0
if i not in flights:flights[i]={}
for j in mdg.loc[i].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period at least once every 2 weeks
if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0}
flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d
for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
if len(mdg.loc[i].loc[j].loc[k])>minn:
if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d
for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique():
try:
if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn:
if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0}
flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d
except:pass
file("countries/wd/json/flights.json",'w').write(json.dumps(flights))