In [54]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [55]:
cluster=json.loads(file('../json/cluster.json','r').read())
citysave=json.loads(file('../json/citysave2.json','r').read())
pop_countries=json.loads(file('../json/pop_countries2.json','r').read())
pop_cities=json.loads(file('../json/pop_cities.json','r').read())

In [56]:
unicities={}
for i in cluster:
    if cluster[i] not in unicities:
        unicities[cluster[i]]=citysave[i]['country']

In [57]:
parent={}

In [58]:
for i in pop_cities:
    #if a k times larger city is within x km
    k=4
    x=100
    ct={}
    for j in pop_cities[i]['nearby']:
        if pop_cities[i]['nearby'][j]['people']>pop_cities[i]['pop']*k:
            if pop_cities[i]['nearby'][j]['km']<x:
                ct[pop_cities[i]['nearby'][j]['people']]=j
    if ct:        
        cm=ct[max(ct.keys())]
        parent[i]={cm:pop_cities[i]['nearby'][cm]['people']}
    else:
        parent[i]={i:pop_cities[i]['pop']}

Create new city list instead of airports with cross-allocating all flights to nearbys