In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
SC=json.loads(file('../json/SC2.json','r').read())
I3=json.loads(file('../json/I3.json','r').read())

Global


In [3]:
dbpath='E:/Dropbox/Public/datarepo/aviation/' #large file db path
MDF_dest=json.loads(file(dbpath+'json/MDF_dest.json','r').read())
MDF_arrv=json.loads(file(dbpath+'json/MDF_arrv.json','r').read())

In [4]:
gdf_dest=pd.DataFrame(MDF_dest)
gdf_arrv=pd.DataFrame(MDF_arrv)

In [5]:
gdf_dest['ID']=gdf_dest['From']
gdf_arrv['ID']=gdf_arrv['To']
gdf=pd.concat([gdf_dest,gdf_arrv])

In [6]:
#parse data into flights
mdg=gdf.set_index(['ID','City','Airport','Airline'])

In [7]:
import os.path
directory='../countries/wd'
if not os.path.exists(directory) :
    os.makedirs(directory)
for j in ['code','d3','json','map']:
    if not os.path.exists(directory+'/'+j):
        os.makedirs(directory+'/'+j)

In [11]:
len(mdg.index.get_level_values(0).unique())


Out[11]:
3636

In [12]:
flights={}
minn=1.0 #want to see minimum 1 flight in the past 2 weeks
for i in mdg.index.get_level_values(0).unique():
    print i,
    #2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv
    d=4.0
    if i not in flights:flights[i]={}
    for j in mdg.loc[i].index.get_level_values(0).unique():
        if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period at least once every 2 weeks
            if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0}
            flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d 
            for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
                if len(mdg.loc[i].loc[j].loc[k])>minn:
                    if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
                    flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d
                    for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique():
                        try:
                            if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn: 
                                if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0}
                                flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d
                        except:pass
file("countries/wd/json/flights.json",'w').write(json.dumps(flights))


YUY YUL
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-12-dce17a622cb9> in <module>()
     11             flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d
     12             for k in mdg.loc[i].loc[j].index.get_level_values(0).unique():
---> 13                 if len(mdg.loc[i].loc[j].loc[k])>minn:
     14                     if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0}
     15                     flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d

C:\Anaconda2\lib\site-packages\pandas\core\indexing.pyc in __getitem__(self, key)
   1294             return self._getitem_tuple(key)
   1295         else:
-> 1296             return self._getitem_axis(key, axis=0)
   1297 
   1298     def _getitem_axis(self, key, axis=0):

C:\Anaconda2\lib\site-packages\pandas\core\indexing.pyc in _getitem_axis(self, key, axis)
   1464 
   1465         # fall thru to straight lookup
-> 1466         self._has_valid_type(key, axis)
   1467         return self._get_label(key, axis=axis)
   1468 

C:\Anaconda2\lib\site-packages\pandas\core\indexing.pyc in _has_valid_type(self, key, axis)
   1401                 raise
   1402             except:
-> 1403                 error()
   1404 
   1405         return True

C:\Anaconda2\lib\site-packages\pandas\core\indexing.pyc in error()
   1388                                     "key")
   1389                 raise KeyError("the label [%s] is not in the [%s]" %
-> 1390                                (key, self.obj._get_axis_name(axis)))
   1391 
   1392             try:

KeyError: u'the label [GUA] is not in the [index]'