# NetworkX

NetworkX is a Python library for doing in-memory graph analysis.



In [1]:

%matplotlib inline
import matplotlib.pyplot as plt
import networkx as nx




In [2]:

# A SIMPLE EXAMPLE

G=nx.Graph()

pos=nx.spring_layout(G)
nx.draw(G, pos=pos)
nx.draw_networkx_labels(G, pos=pos)
plt.show()







In [3]:

# SOME FAKE DATA

locations =['Large Warehouse', 'Small Warehouse'
, 'Retail 1', 'Retail 2', 'Retail 3', 'Retail 4'
, 'Supplier 1', 'Supplier 2', 'Supplier 3']

[   0, 1500,  100,  275, 1600, 1750,  500,  130, 1550] #Large Warehouse'
, [  -1,    0, 1475, 1600,  400,   50,  500, 1800,  100] #Small Warehouse'
, [  -1,   -1,    0,  300, 1750, 1600, 9999, 9999, 9999] #Retail 1
, [  -1,   -1,   -1,    0, 1840, 1900, 9999, 9999, 9999] #Retail 2
, [  -1,   -1,   -1,   -1,    0,  650, 9999, 9999, 9999] #Retail 3
, [  -1,   -1,   -1,   -1,   -1,    0, 9999, 9999, 9999] #Retail 4
, [  -1,   -1,   -1,   -1,   -1,   -1,    0,  400,  700] #Supplier 1
, [  -1,   -1,   -1,   -1,   -1,   -1,   -1,    0, 1900] #Supplier 2
, [  -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1, 1775] #Supplier 3
]




In [4]:

# CONVERT THAT FAKE DATA INTO A GRAPH

g = nx.Graph()

for loc in locations:

for i in range(len(locations)):
r = locations[i]
for j in range (i+1, len(locations)):
c = locations[j]
val = row[j]
if val > 0 and val < 9999:




In [5]:

# VISUALIZE OUR DATASET

pos={'Large Warehouse': [ 7,  2],
'Small Warehouse': [ 2,  1.75],
'Retail 1':        [ 6.5,  3],
'Retail 2':        [ 7.5,  .6],
'Retail 3':        [ 3,  .6],
'Retail 4':        [ 1.5,  0.75],
'Supplier 1':      [ 5,  3.5],
'Supplier 2':      [ 9,  3],
'Supplier 3':      [ 1,  2.5 ]}
nx.draw(g, pos=pos, node_size=4000)
nx.draw_networkx_labels(g, pos=pos)
plt.show()







In [6]:

# WHAT IS THE SHORTEST ROUTE TO TRANSPORT FROM SUPPLIER 1 TO RETAIL 3?

nx.dijkstra_path(g, source='Supplier 1', target='Retail 3', weight='miles')




Out[6]:

['Supplier 1', 'Small Warehouse', 'Retail 3']



# Implicit node creation on edge add: a "gotcha" / feature of note



In [7]:

print g.nodes()
print g.nodes()




['Supplier 1', 'Small Warehouse', 'Supplier 2', 'Supplier 3', 'Large Warehouse', 'Retail 4', 'Retail 1', 'Retail 3', 'Retail 2']
['Supplier 1', 'Small Warehouse', 'Supplier 2', 'Supplier 3', 'Large Warehouse', 'Retail 5', 'Retail 4', 'Retail 1', 'Retail 3', 'Retail 2']



# Just a touch of computational theory

## Dijkstra's algorithm

• Finds the shortest path between a source node and all other nodes

Time complexity: $O(|E| + |V|\text{log}|V|)$

## A* search algorithm

• Extends Dijkstra adding in some heuristics

Time complexity: $O(|E|)$

## Johnson's algorithm

• Finds all pairs of shortest paths in a weighted graph
• An extension of Dijkstra (sort of)
• An impressive algorithm

Time complexity: $O(|V|^2 \text{log}|V| + |V||E|$

## Floyd-Warshall algorithm

• Finds all pairs of shortest paths in a weighted graph
• Allows negative edges, but no negative cycles
• Better with highly dense graphs

Time complexity: $O(|V|^3)$

Space complexity: $\Theta(|V|^2)$



In [8]:

ap = nx.floyd_warshall(g, weight='miles')
print(ap['Supplier 3'])




defaultdict(<function <lambda> at 0x7f62a97a5f50>, {'Large Warehouse': 1100, 'Supplier 2': 1000, 'Supplier 3': 0, 'Supplier 1': 600, 'Small Warehouse': 100, 'Retail 5': 601.0, 'Retail 4': 150, 'Retail 1': 1200, 'Retail 3': 500, 'Retail 2': 1375})



# Uses for NetworkX in Data Science

• Related products
• Supply chain optimization
• Social graph
• Internet topology