Graph topology


In [55]:
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = (6.0, 4.0)
matplotlib.rcParams['savefig.format'] = 'png'

Connectedness


In [58]:
G=nx.erdos_renyi_graph(50,0.1)

In [59]:
nx.draw(G)



In [60]:
nx.is_connected(G)


Out[60]:
False

In [61]:
components=list(nx.connected_components(G))
print(components)


[{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}, {23}]

In [64]:
colors=np.random.random(size=len(components))
node_color_dict={}

for idx, nodes in enumerate(components):
    for n in nodes:
        node_color_dict[n]=colors[idx]
    
nx.draw(G, node_color=[node_color_dict[n] for n in G.nodes()], pos=nx.layout.spring_layout(G))


Degree distribution


In [37]:
huge_graph=nx.read_gpickle("../data/airport/big_airportnet.gpickle")

In [38]:
degs=list(huge_graph.degree().values())

In [39]:
plt.hist(degs)
plt.xlabel("degree", fontsize=30)
plt.ylabel("frequency", fontsize=30)


Out[39]:
<matplotlib.text.Text at 0x7f715646e630>

Centrality


In [40]:
cents=nx.centrality.betweenness_centrality(huge_graph)

Get the centralmost node:


In [66]:
sorted(cents.keys(), key=lambda x:cents[x])[-2]


Out[66]:
'JFK'

In [42]:
plt.hist(list(cents.values()), bins=10)


Out[42]:
(array([ 443.,   25.,    8.,    6.,    8.,    1.,    4.,    1.,    1.,    3.]),
 array([ 0.        ,  0.00533981,  0.01067962,  0.01601943,  0.02135924,
         0.02669905,  0.03203886,  0.03737866,  0.04271847,  0.04805828,
         0.05339809]),
 <a list of 10 Patch objects>)

Many real world networks tend to have these very few "hubs" with high centrality

Clustering


In [43]:
J=nx.erdos_renyi_graph(8,0.4)
nx.draw(J)



In [69]:
print(nx.clustering(J))

pos = nx.spring_layout(J)
nx.draw(J, pos = pos)
nx.draw_networkx_labels(J, pos = pos)


{0: 0.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0, 5: 0.0, 6: 0.0, 7: 0.0}
Out[69]:
{0: <matplotlib.text.Text at 0x7f7155025ba8>,
 1: <matplotlib.text.Text at 0x7f7154407be0>,
 2: <matplotlib.text.Text at 0x7f7154407a20>,
 3: <matplotlib.text.Text at 0x7f7154f8cf28>,
 4: <matplotlib.text.Text at 0x7f7154f8ccf8>,
 5: <matplotlib.text.Text at 0x7f7154f98400>,
 6: <matplotlib.text.Text at 0x7f7154f98f98>,
 7: <matplotlib.text.Text at 0x7f7154f84748>}
Now let's see how our scale free network is regarding clustering:

In [46]:
huge_clust=nx.clustering(huge_graph)

In [47]:
nx.draw(huge_graph, node_color=[huge_clust[n] for n in huge_graph.nodes()], pos=nx.layout.fruchterman_reingold_layout(huge_graph, scale=2.0, iterations=100))



In [48]:
plt.hist(list(huge_clust.values()))


Out[48]:
(array([   2.,    0.,   13.,   29.,   50.,   89.,  111.,  101.,   77.,   28.]),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 10 Patch objects>)

Average path length


In [49]:
nx.average_shortest_path_length(huge_graph)


Out[49]:
2.2686573146292583

In many real world networks, shortest pathlengths increase sublinearly with network size:


In [50]:
size_array=range(50,1000,50)
avg_pathlens=[nx.average_shortest_path_length(nx.barabasi_albert_graph(n, 4)) for n in size_array]

In [51]:
plt.plot(size_array, avg_pathlens, 'go')
plt.xlabel("size", fontsize=30)
plt.ylabel("avg pathlengths", fontsize=30)


Out[51]:
<matplotlib.text.Text at 0x7f71560c54a8>

In [52]:
plt.plot(np.log(np.array(size_array)), np.array(avg_pathlens), 'bo-')
plt.xlabel("log(size)", fontsize=30)
plt.ylabel("avg pathlengths", fontsize=30)


Out[52]:
<matplotlib.text.Text at 0x7f71550b9860>

Diameter:


In [73]:
J = nx.erdos_renyi_graph(20, 5.0/20.0)
nx.draw(J)
nx.diameter(J)


Out[73]:
4

Minimum spanning tree


In [76]:
T = nx.minimum_spanning_tree(J)

pos = nx.spring_layout(J)

nx.draw(J, pos = pos)
nx.draw_networkx_edges(T, edge_color='g', pos = pos, width = 5)


Out[76]:
<matplotlib.collections.LineCollection at 0x7f71559bada0>

Let's do some exercises!


In [ ]: