Load the airport network data into networkx


In [1]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import scipy.optimize as opt

Load the adjacency matrix (csv file to numpy array):


In [5]:
A = np.loadtxt('../data/airport/air500matrix.csv', dtype=int, delimiter=',')

Load the airport names (IATA codes) and lat/long information:


In [39]:
latlong_data=np.loadtxt('../data/airport/latlongs.dat', dtype='str', delimiter=',')
node_labels=[str(i[2:-1]).strip() for i in latlong_data[:,0]]
node_pos=[(float(i[3:-2]), float(j[3:-1])) for i,j in latlong_data[:,1:]]

Create a networkx graph from the adjacency matrix:


In [43]:
T = nx.Graph(A)

Label the nodes with the airport names:


In [44]:
N = len(node_labels)
label_mapping = dict(zip(range(N), node_labels))
T = nx.relabel_nodes(T, label_mapping)

Add the latitudes and longitudes as attributes:


In [45]:
nx.set_node_attributes(T, "latlon", dict(zip(node_labels, node_pos)))

In [47]:
nx.write_gpickle(T, "../data/airport/big_airportnet.gpickle")

Check it out:


In [20]:
T.node['FRA']['latlon']


Out[20]:
(8.5431249999999999, 50.026420999999999)

Plot the network:


In [21]:
nx.draw(T)


/usr/lib/pymodules/python2.7/networkx/drawing/layout.py:514: RuntimeWarning: divide by zero encountered in longlong_scalars
  pos[:,i]*=scale/lim

In [22]:
nx.draw_networkx(T, with_labels=False, node_size=10, alpha=0.6, width=0.1)



In [23]:
nx.draw_networkx(T, with_labels=False, node_size=10, alpha=0.6, width=0.07, pos=nx.random_layout(T))


There are many other layouts available. please expore nx.layouts

Now let's plot the airports over a map projection:


In [24]:
from mpl_toolkits.basemap import Basemap
import matplotlib.colors as colors

In [25]:
bmap=Basemap(llcrnrlon=-170, llcrnrlat=-48.7,urcrnrlon=179.99, urcrnrlat=70.8, lat_0=45, lon_0=10, resolution='h')

In [26]:
def plot_over_basemap(G, node_colors=None):
    fig = plt.figure(figsize=(25,25), dpi=100)
    ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
    dummy=bmap.drawcoastlines(color='white')
    dummy2=bmap.drawcountries(color='white')
    dummy3=bmap.fillcontinents(color='0.8', alpha=0.6)
    
    node_labels=G.nodes()
    pos_on_map=[bmap(*G.node[label]['latlon']) for label in node_labels]
    posdict=dict(zip(node_labels, pos_on_map))
    
    if node_colors is not None:
        nodes=nx.draw_networkx_nodes(G, with_labels=False, node_size=25, alpha=1.0, width=0.1, pos=posdict, node_color=node_colors, cmap=plt.get_cmap('cool'))
        nx.draw_networkx_edges(G, pos=posdict, alpha=0.4)
        plt.colorbar(nodes, orientation='horizontal', pad=0.001, aspect=40)
        plt.axis('off')
    else:
        nx.draw_networkx(G, with_labels=False, node_size=25, alpha=1.0, width=0.1, pos=posdict)

In [27]:
plot_over_basemap(T)


This is way too big. Let's consider only the airports that fall in a lat/lon box:


In [5]:
airports_eurasia=np.loadtxt("data/eurasia_airport_labels.txt",dtype=str)

In [29]:
G=T.subgraph(airports_eurasia)

In [30]:
bmap=Basemap(llcrnrlon=-18.32, llcrnrlat=-49,urcrnrlon=179.99, urcrnrlat=71, lat_0=45, lon_0=10, resolution='h')

In [31]:
plot_over_basemap(G)


The degree distribution:


In [32]:
print G.degree()


{'CPH': 94, 'KMG': 44, 'CKG': 36, 'DKR': 22, 'SUF': 18, 'CCU': 20, 'DRS': 26, 'HBA': 4, 'DOH': 71, 'DRW': 9, 'BGO': 29, 'WAW': 69, 'NAP': 54, 'FNC': 33, 'BOM': 59, 'BOO': 5, 'BGY': 51, 'BOD': 35, 'SIN': 96, 'AGP': 78, 'PRG': 91, 'SAH': 23, 'RIX': 45, 'LYS': 47, 'NNG': 23, 'RAK': 31, 'HKT': 18, 'KHH': 17, 'KHI': 35, 'MYJ': 9, 'BNE': 23, 'ESB': 22, 'SVQ': 33, 'HKG': 115, 'HKD': 4, 'SVO': 64, 'HAJ': 63, 'CHC': 11, 'HAM': 77, 'HAN': 22, 'URC': 33, 'MRU': 25, 'THR': 31, 'MRS': 41, 'BFS': 40, 'ITM': 11, 'LHR': 160, 'AYT': 29, 'KBP': 51, 'LPA': 55, 'HRB': 29, 'TYN': 30, 'KIX': 58, 'SYD': 41, 'SNN': 28, 'ZAG': 25, 'VIE': 101, 'XMN': 46, 'MAA': 41, 'JNB': 43, 'MZG': 2, 'MAD': 110, 'OVD': 13, 'MEL': 32, 'MAH': 39, 'SYX': 27, 'LEJ': 42, 'MAN': 101, 'TSN': 38, 'CMN': 51, 'EMA': 49, 'DME': 51, 'NGB': 27, 'FLR': 23, 'BEG': 33, 'GUM': 15, 'TSA': 2, 'SKG': 35, 'BEY': 49, 'DEL': 62, 'MES': 4, 'NGS': 6, 'ICN': 97, 'FUK': 29, 'RGN': 10, 'VCE': 60, 'TFS': 59, 'AMM': 48, 'CEB': 11, 'CPT': 14, 'VLC': 47, 'AMD': 17, 'IBZ': 56, 'SZX': 46, 'MNL': 36, 'OSL': 77, 'PLZ': 3, 'WNZ': 27, 'AMS': 170, 'WLG': 7, 'SCQ': 18, 'STN': 95, 'BUD': 75, 'DLC': 40, 'DLA': 12, 'VGO': 9, 'BRI': 20, 'TRN': 34, 'UPG': 3, 'MHD': 8, 'BLQ': 44, 'SXB': 22, 'BLR': 29, 'BLL': 32, 'KMI': 6, 'XRY': 19, 'CSX': 32, 'XIY': 36, 'FOC': 33, 'GRO': 32, 'TRS': 10, 'LJU': 26, 'CJU': 14, 'WUH': 34, 'NGO': 45, 'SXF': 63, 'SHA': 26, 'MPL': 16, 'PSA': 39, 'KCH': 6, 'MXP': 122, 'LCY': 27, 'CBR': 5, 'LCG': 8, 'LCA': 52, 'OPO': 34, 'CAN': 67, 'SHE': 46, 'ACC': 16, 'CTU': 40, 'BCN': 114, 'HND': 18, 'ACE': 46, 'CTS': 22, 'LIS': 60, 'GLA': 45, 'KMJ': 6, 'ATH': 69, 'IST': 96, 'PEN': 20, 'KUL': 72, 'TPE': 56, 'CTA': 41, 'MUC': 125, 'PEK': 89, 'ISB': 24, 'COK': 19, 'ISG': 4, 'SHJ': 47, 'SVG': 24, 'LBA': 38, 'CGK': 36, 'NCE': 73, 'PER': 19, 'CGN': 97, 'BKI': 15, 'BKK': 94, 'BSL': 50, 'NBO': 33, 'PVG': 86, 'CGQ': 27, 'PMO': 31, 'NUE': 47, 'VRN': 31, 'FCO': 112, 'JER': 21, 'FRA': 169, 'DMK': 3, 'TLV': 51, 'HER': 44, 'AUH': 58, 'DUB': 94, 'GOA': 18, 'KRT': 19, 'DMM': 41, 'LHW': 27, 'KRK': 43, 'ADD': 31, 'SUB': 7, 'JED': 53, 'HEL': 82, 'DUR': 4, 'DUS': 102, 'LTN': 52, 'TIP': 30, 'TLL': 23, 'RUH': 45, 'CDG': 160, 'LED': 46, 'ORY': 66, 'ALG': 32, 'BRU': 135, 'EDI': 46, 'BRS': 58, 'ALC': 64, 'PMI': 75, 'TAO': 37, 'FUE': 47, 'ORK': 36, 'TUN': 46, 'MME': 19, 'BRE': 36, 'NTE': 29, 'PUS': 25, 'TLS': 35, 'LIN': 35, 'KOJ': 10, 'HYD': 26, 'HGH': 37, 'TOS': 7, 'DPS': 18, 'MFM': 25, 'LHE': 26, 'LUX': 98, 'LOS': 23, 'CAI': 63, 'HAK': 31, 'CGO': 31, 'VNO': 29, 'YNT': 22, 'UKB': 6, 'TRV': 18, 'BIO': 32, 'BAH': 51, 'TXL': 68, 'KWE': 31, 'DAC': 28, 'KWI': 57, 'DAM': 50, 'GAU': 6, 'AKL': 25, 'OKA': 22, 'KWL': 33, 'SOU': 27, 'FAO': 50, 'DAR': 9, 'SGN': 29, 'NCL': 44, 'HIJ': 12, 'KMQ': 10, 'MLA': 58, 'GOI': 13, 'CNS': 13, 'MLE': 23, 'ABJ': 19, 'GVA': 84, 'ADL': 13, 'CNX': 10, 'CMB': 38, 'TNA': 34, 'CWL': 31, 'STR': 79, 'GOT': 43, 'ABZ': 30, 'NAN': 8, 'ZRH': 109, 'SZG': 35, 'TFN': 16, 'TRD': 15, 'NKG': 43, 'PNQ': 9, 'OOL': 6, 'GMP': 3, 'SOF': 43, 'BHD': 16, 'LGW': 127, 'PNH': 12, 'LPL': 38, 'GRZ': 22, 'SDJ': 16, 'BHX': 63, 'OTP': 44, 'ADB': 24, 'MCT': 35, 'DXB': 106, 'ARN': 77, 'NRT': 69}

It's a standard python dictionary:


In [33]:
G.degree()['LHR']


Out[33]:
160

Get the degree distribution


In [34]:
deg_dist=G.degree().values()

Histogram:


In [35]:
freqs,bins,_=plt.hist(deg_dist)


Fitting power law


In [36]:
freqs


Out[36]:
array([65, 91, 63, 27, 12, 15,  6,  4,  0,  4])

In [37]:
bins


Out[37]:
array([   2. ,   18.8,   35.6,   52.4,   69.2,   86. ,  102.8,  119.6,
        136.4,  153.2,  170. ])

In [38]:
binmids=[(bins[i]+bins[i+1])/2.0 for i in range(freqs.shape[0])]

In [39]:
binmids


Out[39]:
[10.4,
 27.200000000000003,
 44.0,
 60.800000000000004,
 77.599999999999994,
 94.400000000000006,
 111.20000000000002,
 128.0,
 144.80000000000001,
 161.60000000000002]

Power law degree distribution: a common occurence


In [40]:
powerfunc=lambda x,a,l:a*x**(-l)

In [41]:
a,l=opt.curve_fit(powerfunc,binmids[2:],freqs[2:])[0]

In [42]:
plt.plot(binmids[2:], powerfunc(binmids[2:], a,l)); plt.hist(deg_dist)


Out[42]:
(array([65, 91, 63, 27, 12, 15,  6,  4,  0,  4]),
 array([   2. ,   18.8,   35.6,   52.4,   69.2,   86. ,  102.8,  119.6,
        136.4,  153.2,  170. ]),
 <a list of 10 Patch objects>)

Shortest paths


In [43]:
nx.shortest_path(G, source='VIE', target='CCU')


Out[43]:
['VIE', 'BKK', 'CCU']

But wait! This is not the only shortest path. Fortunately, we are covered by another function


In [44]:
for path in nx.all_shortest_paths(G, source='VIE', target='CCU'):
    print path


['VIE', 'BOM', 'CCU']
['VIE', 'LHR', 'CCU']
['VIE', 'AMS', 'CCU']
['VIE', 'DEL', 'CCU']
['VIE', 'DXB', 'CCU']
['VIE', 'BKK', 'CCU']
['VIE', 'FRA', 'CCU']
['VIE', 'AUH', 'CCU']

Now time to solve Ex 3 and Ex 4

Centrality:


In [45]:
cents=nx.closeness_centrality(G)

In [46]:
centarray=[cents[node] for node in G.nodes()]
plot_over_basemap(G, node_colors=centarray)


####More centrality measures?? Definitions in Graph theory section

Histogram of centralities


In [47]:
plt.hist(centarray)


Out[47]:
(array([ 4, 11, 40, 63, 76, 50, 19, 17,  3,  4]),
 array([ 0.29698858,  0.33773307,  0.37847756,  0.41922205,  0.45996655,
        0.50071104,  0.54145553,  0.58220002,  0.62294451,  0.66368901,
        0.7044335 ]),
 <a list of 10 Patch objects>)

Model of infection spreading

Let the initial concentration of pathogen be:

$c_0=(c_1,c_2,\cdots,c_n)$

And let the spreading happen according to this law:

$c_{n+1}=(D^{-1}A)^T c_n=Sc_n$

##### TODO: degree matrix and adj matrix in exercise


In [48]:
A_d=np.array(nx.adjacency_matrix(G))
D=A_d.sum(axis=0)
Dinv=np.diag(np.where(D>0,1.0/D,0))
S=np.dot(Dinv,A_d).T

Let's introduce pathogen at FRA


In [49]:
conc=np.zeros(G.number_of_nodes())
conc[G.nodes().index('FRA')]=1

In [50]:
S


Out[50]:
array([[ 0.        ,  0.        ,  0.        , ...,  0.00943396,
         0.01298701,  0.00625   ],
       [ 0.        ,  0.        ,  0.02777778, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.02272727,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ..., 
       [ 0.0106383 ,  0.        ,  0.        , ...,  0.        ,
         0.01298701,  0.00625   ],
       [ 0.0106383 ,  0.        ,  0.        , ...,  0.00943396,
         0.        ,  0.00625   ],
       [ 0.0106383 ,  0.        ,  0.        , ...,  0.00943396,
         0.01298701,  0.        ]])

Add the concentrations as attributes


In [51]:
plot_over_basemap(G, node_colors=conc)



In [52]:
conc2=np.dot(S,conc)
plot_over_basemap(G, node_colors=conc2)



In [53]:
conc3=np.dot(S,conc2)
plot_over_basemap(G, node_colors=conc3)



In [54]:
plt.hist(conc3)


Out[54]:
(array([147,  79,  31,  12,  10,   4,   1,   1,   1,   1]),
 array([ 0.        ,  0.00251785,  0.00503571,  0.00755356,  0.01007141,
        0.01258927,  0.01510712,  0.01762497,  0.02014283,  0.02266068,
        0.02517853]),
 <a list of 10 Patch objects>)

In [55]:
plt.hist(conc2)


Out[55]:
(array([118,   0,   0,   0,   0,   0,   0,   0,   0, 169]),
 array([ 0.        ,  0.00059172,  0.00118343,  0.00177515,  0.00236686,
        0.00295858,  0.0035503 ,  0.00414201,  0.00473373,  0.00532544,
        0.00591716]),
 <a list of 10 Patch objects>)

In [56]:
plt.hist(conc)


Out[56]:
(array([286,   0,   0,   0,   0,   0,   0,   0,   0,   1]),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 10 Patch objects>)

Stable configuration:


In [57]:
S50=np.linalg.matrix_power(S,50)

In [58]:
conc50=np.dot(S50, conc)

In [59]:
conc100=np.dot(np.linalg.matrix_power(S,50), conc50)

In [60]:
conc150=np.dot(np.linalg.matrix_power(S,50), conc100)

In [61]:
np.linalg.norm(conc150-conc100)


Out[61]:
2.1599599714390042e-08

In [62]:
plot_over_basemap(G, node_colors=conc50)



In [63]:
plt.hist(conc50)


Out[63]:
(array([65, 91, 63, 27, 13, 14,  6,  4,  0,  4]),
 array([ 0.00016979,  0.00159708,  0.00302438,  0.00445167,  0.00587896,
        0.00730625,  0.00873354,  0.01016083,  0.01158812,  0.01301541,
        0.0144427 ]),
 <a list of 10 Patch objects>)

In [64]:
conc50[G.nodes().index('FRA')]


Out[64]:
0.014357410438712904

Remove central edges


In [65]:
H=G.copy()

In [66]:
edge_centrs=nx.edge_betweenness_centrality(H)

In [67]:
sorted_centrs=sorted(edge_centrs.keys(), cmp=lambda x,y:cmp(edge_centrs[x], edge_centrs[y]))

In [68]:
for  i in xrange(10):
    rmstart, rmend=sorted_centrs[-i]
    H.remove_edge(rmstart, rmend)

In [69]:
A_d=np.array(nx.adjacency_matrix(H))
D=A_d.sum(axis=0)
Dinv=np.diag(np.where(D>0,1.0/D,0))
S=np.dot(Dinv,A_d).T


-c:3: RuntimeWarning: divide by zero encountered in divide

In [70]:
conc=np.zeros(H.number_of_nodes())
conc[G.nodes().index('FRA')]=1

In [71]:
conc50=np.dot(np.linalg.matrix_power(S,50), conc)

In [72]:
plt.hist(conc50)


Out[72]:
(array([57, 90, 68, 28, 15, 15,  6,  4,  0,  4]),
 array([ 0.        ,  0.00144531,  0.00289061,  0.00433592,  0.00578123,
        0.00722653,  0.00867184,  0.01011715,  0.01156245,  0.01300776,
        0.01445307]),
 <a list of 10 Patch objects>)

In [73]:
conc2=np.dot(S, conc)

In [74]:
plt.hist(conc2)


Out[74]:
(array([241,   0,   0,   0,   0,   0,   0,   0,   0,  46]),
 array([ 0.        ,  0.00217391,  0.00434783,  0.00652174,  0.00869565,
        0.01086957,  0.01304348,  0.01521739,  0.0173913 ,  0.01956522,
        0.02173913]),
 <a list of 10 Patch objects>)

In [75]:
plot_over_basemap(H, node_colors=conc2)



In [76]:
plt.hist(conc50)


Out[76]:
(array([57, 90, 68, 28, 15, 15,  6,  4,  0,  4]),
 array([ 0.        ,  0.00144531,  0.00289061,  0.00433592,  0.00578123,
        0.00722653,  0.00867184,  0.01011715,  0.01156245,  0.01300776,
        0.01445307]),
 <a list of 10 Patch objects>)