In [1]:
%matplotlib inline
import networkx as nx
import matplotlib.pyplot as plt

Introduction to graph theory


In [2]:
G = nx.Graph()
G.add_edge(1,2)
nx.draw_networkx(G)
plt.show()



In [3]:
G.add_nodes_from([3, 4])
nx.draw_networkx(G)
plt.show()



In [4]:
G.add_edge(3,4)
G.add_edges_from([(2, 3), (4, 1)])
nx.draw_networkx(G)
plt.show()



In [5]:
G.nodes()


Out[5]:
[1, 2, 3, 4]

In [6]:
G.edges()


Out[6]:
[(1, 2), (1, 4), (2, 3), (3, 4)]

In [7]:
G.adjacency_list()


Out[7]:
[[2, 4], [1, 3], [4, 2], [3, 1]]

In [8]:
nx.to_dict_of_lists(G)


Out[8]:
{1: [2, 4], 2: [1, 3], 3: [4, 2], 4: [3, 1]}

In [9]:
nx.to_edgelist(G)


Out[9]:
[(1, 2, {}), (1, 4, {}), (2, 3, {}), (3, 4, {})]

In [10]:
nx.to_numpy_matrix(G)


Out[10]:
matrix([[ 0.,  1.,  0.,  1.],
        [ 1.,  0.,  1.,  0.],
        [ 0.,  1.,  0.,  1.],
        [ 1.,  0.,  1.,  0.]])

In [12]:
print (nx.to_scipy_sparse_matrix(G))


  (0, 1)	1
  (0, 3)	1
  (1, 0)	1
  (1, 2)	1
  (2, 1)	1
  (2, 3)	1
  (3, 0)	1
  (3, 2)	1

In [13]:
G.add_edge(1,3)
nx.draw_networkx(G)
plt.show()



In [14]:
G.degree()


Out[14]:
{1: 3, 2: 2, 3: 3, 4: 2}

In [15]:
plt.hist(nx.fast_gnp_random_graph(10000, 0.01).degree().values())


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-15-a7922f6b8d3a> in <module>()
----> 1 plt.hist(nx.fast_gnp_random_graph(10000, 0.01).degree().values())

C:\Users\keith\Anaconda3\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, data, **kwargs)
   3079                       histtype=histtype, align=align, orientation=orientation,
   3080                       rwidth=rwidth, log=log, color=color, label=label,
-> 3081                       stacked=stacked, data=data, **kwargs)
   3082     finally:
   3083         ax._hold = washold

C:\Users\keith\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
   1895                     warnings.warn(msg % (label_namer, func.__name__),
   1896                                   RuntimeWarning, stacklevel=2)
-> 1897             return func(ax, *args, **kwargs)
   1898         pre_doc = inner.__doc__
   1899         if pre_doc is None:

C:\Users\keith\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in hist(***failed resolving arguments***)
   6144             x = np.array([[]])
   6145         else:
-> 6146             x = _normalize_input(x, 'x')
   6147         nx = len(x)  # number of datasets
   6148 

C:\Users\keith\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in _normalize_input(inp, ename)
   6090                 else:
   6091                     raise ValueError(
-> 6092                         "{ename} must be 1D or 2D".format(ename=ename))
   6093                 if inp.shape[1] < inp.shape[0]:
   6094                     warnings.warn(

ValueError: x must be 1D or 2D

Graph algorithms


In [16]:
G = nx.krackhardt_kite_graph()
nx.draw_networkx(G)
plt.show()



In [17]:
print nx.has_path(G, source=1, target=9)
print nx.shortest_path(G, source=1, target=9)
print nx.shortest_path_length(G, source=1, target=9)


  File "<ipython-input-17-326014e4ecb0>", line 1
    print nx.has_path(G, source=1, target=9)
           ^
SyntaxError: invalid syntax

In [18]:
nx.betweenness_centrality(G)


Out[18]:
{0: 0.023148148148148143,
 1: 0.023148148148148143,
 2: 0.0,
 3: 0.10185185185185183,
 4: 0.0,
 5: 0.23148148148148148,
 6: 0.23148148148148148,
 7: 0.38888888888888884,
 8: 0.2222222222222222,
 9: 0.0}

In [19]:
nx.degree_centrality(G)


Out[19]:
{0: 0.4444444444444444,
 1: 0.4444444444444444,
 2: 0.3333333333333333,
 3: 0.6666666666666666,
 4: 0.3333333333333333,
 5: 0.5555555555555556,
 6: 0.5555555555555556,
 7: 0.3333333333333333,
 8: 0.2222222222222222,
 9: 0.1111111111111111}

In [20]:
nx.closeness_centrality(G)


Out[20]:
{0: 0.5294117647058824,
 1: 0.5294117647058824,
 2: 0.5,
 3: 0.6,
 4: 0.5,
 5: 0.6428571428571429,
 6: 0.6428571428571429,
 7: 0.6,
 8: 0.42857142857142855,
 9: 0.3103448275862069}

In [21]:
nx.eigenvector_centrality(G)


Out[21]:
{0: 0.35220918419838565,
 1: 0.35220918419838565,
 2: 0.28583482369644964,
 3: 0.481020669200118,
 4: 0.28583482369644964,
 5: 0.3976909028137205,
 6: 0.3976909028137205,
 7: 0.19586101425312444,
 8: 0.04807425308073236,
 9: 0.011163556091491361}

In [22]:
nx.clustering(G)


Out[22]:
{0: 0.6666666666666666,
 1: 0.6666666666666666,
 2: 1.0,
 3: 0.5333333333333333,
 4: 1.0,
 5: 0.5,
 6: 0.5,
 7: 0.3333333333333333,
 8: 0.0,
 9: 0.0}

In [32]:
import community # Community module for community detection and clustering

G = nx.powerlaw_cluster_graph(100, 1, .4)
partition = community.best_partition(G)

for i in set(partition.values()):
   print ("Community", i)
   members = list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == i]
   print (members)

values = [partition.get(node) for node in G.nodes()]
nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)
plt.show()

print ("Modularity score:", community.modularity(partition, G))


  File "C:\Users\keith\github\ipython_notebook\Python Data Science Essentials\chapter_5\community.py", line 512
    print str(elem) + " " + str(part)
            ^
SyntaxError: invalid syntax

Graph loading, dumping, and sampling


In [33]:
dump_file_base = "dumped_graph"

# Be sure the dump_file file doesn't exist
def remove_file(filename):
    import os
    if os.path.exists(filename):
        os.remove(filename)

In [34]:
G = nx.krackhardt_kite_graph()

In [35]:
# GML format write and read
GML_file = dump_file_base + '.gml'
remove_file(GML_file)

nx.write_gml(G, GML_file)
G2 = nx.read_gml(GML_file)

assert(G.edges() == G2.edges())


---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-35-908a19793297> in <module>()
      6 G2 = nx.read_gml(GML_file)
      7 
----> 8 assert(G.edges() == G2.edges())

AssertionError: 

In [36]:
# The same can be done with
# JSON, Adjacency List, Edge List, GEXF, GraphML and so on

In [37]:
import snowball_sampling
my_social_network = nx.Graph()
snowball_sampling.snowball_sampling(my_social_network, 2, 'alberto')
nx.draw(my_social_network)
plt.show()


Reching depth 0
 new nodes to investigate: ['alberto']
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-37-9926c45f4270> in <module>()
      1 import snowball_sampling
      2 my_social_network = nx.Graph()
----> 3 snowball_sampling.snowball_sampling(my_social_network, 2, 'alberto')
      4 nx.draw(my_social_network)
      5 plt.show()

C:\Users\keith\github\ipython_notebook\Python Data Science Essentials\chapter_5\snowball_sampling.py in snowball_sampling(graph, max_depth, central_name, sampling_rate)
     37 
     38             if len(nodes_that_depth) == 1:
---> 39                 get_json_name(graph, node)
     40 
     41             elif random.random() <= sampling_rate:

C:\Users\keith\github\ipython_notebook\Python Data Science Essentials\chapter_5\snowball_sampling.py in get_json_name(graph, name)
      6     Note: this function modifies the graph!
      7     """
----> 8     import urllib2
      9 
     10     response = urllib2.urlopen('http://www.livejournal.com/misc/fdata.bml?user=' + name)

ModuleNotFoundError: No module named 'urllib2'

In [38]:
my_sampled_social_network = nx.Graph()
snowball_sampling.snowball_sampling(my_sampled_social_network, 3, 'alberto', sampling_rate=0.2)
nx.draw(my_sampled_social_network)
plt.show()


Reching depth 0
 new nodes to investigate: ['alberto']
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-38-cc7091f79071> in <module>()
      1 my_sampled_social_network = nx.Graph()
----> 2 snowball_sampling.snowball_sampling(my_sampled_social_network, 3, 'alberto', sampling_rate=0.2)
      3 nx.draw(my_sampled_social_network)
      4 plt.show()

C:\Users\keith\github\ipython_notebook\Python Data Science Essentials\chapter_5\snowball_sampling.py in snowball_sampling(graph, max_depth, central_name, sampling_rate)
     37 
     38             if len(nodes_that_depth) == 1:
---> 39                 get_json_name(graph, node)
     40 
     41             elif random.random() <= sampling_rate:

C:\Users\keith\github\ipython_notebook\Python Data Science Essentials\chapter_5\snowball_sampling.py in get_json_name(graph, name)
      6     Note: this function modifies the graph!
      7     """
----> 8     import urllib2
      9 
     10     response = urllib2.urlopen('http://www.livejournal.com/misc/fdata.bml?user=' + name)

ModuleNotFoundError: No module named 'urllib2'

In [ ]:


In [ ]:


In [ ]:


In [ ]: