scratch

  1. copy of network dataframe
  2. drawing network graph

In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

gml_files = glob('../output/network/*/*.gml')

# graph = nx.read_gml('../data/graph/article1.gml')
# print(len(gml_files))
# gml_files
# gml_files[0]

In [2]:
def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info
    
    ## plot spring layout
    # plt.figure(figsize=(11,11))
    # nx.draw_spring(graph, arrows=True, with_labels=True)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

In [3]:
# create empty dataframe with columns

network_data_columns = ['name',
                    'sentiment',
                    '# nodes',
                    '# edges',
                    'avg degree',
                    'density',
                    'avg deg cent',
                    'avg bet cent',
                    'avg clo cent',
                    'highest degc',
                    'highest betc',
                    'highest cloc',
                    'avg in-deg',
                    'avg out-deg',
                    '# strong comp',
                    '# weak comp',
                    '# conn comp',
                    'avg node connect',
                    'deg assort coeff',
                    ]

network_data = pd.DataFrame(columns = network_data_columns)
## re: dealing with lost edges during graph conversions # take multidigraph and convert to undirected graph U # graph U is reciprocal so only missing edges are listed # take list of missing edges and set = e # add list e to ugraph = ugraph now contains all info from graph U = graph.to_undirected(reciprocal=True) e = U.edges() ugraph.add_edges_from(e) print nx.info(ugraph)

In [5]:
# graph = directed, ugraph = undirected

for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    ugraph = graph.to_undirected() ## to undirected graph
    ###
#    U = graph.to_undirected(reciprocal=True)
#    e = U.edges()
#    ugraph.add_edges_from(e)
    ###
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 40)
    print(gml_graph)
    calculate_graph_inf(graph)
    calculate_graph_inf(ugraph)

    # calculate variables
    
    sent = filepath.split('/')[-1]
    nodes = nx.number_of_nodes(graph)
    edges = nx.number_of_edges(graph)
    density = float("{0:.4f}".format(nx.density(graph)))
    avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
    avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
    avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
    in_deg = sum(graph.in_degree().values())/float(nx.number_of_nodes(graph))
    out_deg = sum(graph.out_degree().values())/float(nx.number_of_nodes(graph))
    avg_deg = float("{0:.4f}".format(in_deg + out_deg))
    strong_comp = nx.number_strongly_connected_components(graph)
    weak_comp =  nx.number_weakly_connected_components(graph)
    avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
    deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
    conn_comp = nx.number_connected_components(ugraph)
    deg_cen = nx.degree_centrality(graph)
    bet_cen = nx.betweenness_centrality(graph)
    clo_cen = nx.closeness_centrality(graph)
    highest_deg_cen = highest_centrality(deg_cen)
    highest_bet_cen = highest_centrality(bet_cen)
    highest_clo_cen = highest_centrality(clo_cen)

    # save variables into list

    graph_values = {'name':filename,
                    'sentiment':sent,
                    '# nodes':nodes,
                    '# edges':edges,
                    'avg degree':avg_deg,
                    'density':density,
                    'avg deg cent':"%.4f" % avg_deg_cen,
                    'avg bet cent':"%.4f" % avg_bet_cen,
                    'avg clo cent':"%.4f" % avg_clo_cen,
                    'highest degc':highest_deg_cen,
                    'highest betc':highest_bet_cen,
                    'highest cloc':highest_clo_cen,
                    'avg in-deg':"%.4f" % in_deg,
                    'avg out-deg':"%.4f" % out_deg,
                    '# strong comp':strong_comp,
                    '# weak comp':weak_comp,
                    '# conn comp':conn_comp,
                    'avg node connect':avg_node_con,
                    'deg assort coeff':deg_assort_coeff
                    }
    
    network_data = network_data.append(graph_values, ignore_index=True)
    
    if graph_num == 20:
        break


----------------------------------------
../output/network/negative/article03.gml
Name: article03.gml
Type: MultiDiGraph
Number of nodes: 18
Number of edges: 13
Average in degree:   0.7222
Average out degree:   0.7222
Name: article03.gml
Type: MultiGraph
Number of nodes: 18
Number of edges: 13
Average degree:   1.4444
----------------------------------------
../output/network/negative/article05.gml
Name: article05.gml
Type: MultiDiGraph
Number of nodes: 22
Number of edges: 25
Average in degree:   1.1364
Average out degree:   1.1364
Name: article05.gml
Type: MultiGraph
Number of nodes: 22
Number of edges: 25
Average degree:   2.2727
----------------------------------------
../output/network/negative/article06.gml
Name: article06.gml
Type: MultiDiGraph
Number of nodes: 124
Number of edges: 121
Average in degree:   0.9758
Average out degree:   0.9758
Name: article06.gml
Type: MultiGraph
Number of nodes: 124
Number of edges: 121
Average degree:   1.9516
----------------------------------------
../output/network/negative/article07.gml
Name: article07.gml
Type: MultiDiGraph
Number of nodes: 56
Number of edges: 57
Average in degree:   1.0179
Average out degree:   1.0179
Name: article07.gml
Type: MultiGraph
Number of nodes: 56
Number of edges: 56
Average degree:   2.0000
----------------------------------------
../output/network/negative/article1.gml
Name: article1.gml
Type: MultiDiGraph
Number of nodes: 140
Number of edges: 147
Average in degree:   1.0500
Average out degree:   1.0500
Name: article1.gml
Type: MultiGraph
Number of nodes: 140
Number of edges: 145
Average degree:   2.0714
----------------------------------------
../output/network/negative/article1001.gml
Name: article1001.gml
Type: MultiDiGraph
Number of nodes: 134
Number of edges: 134
Average in degree:   1.0000
Average out degree:   1.0000
Name: article1001.gml
Type: MultiGraph
Number of nodes: 134
Number of edges: 134
Average degree:   2.0000
----------------------------------------
../output/network/negative/article1021.gml
Name: article1021.gml
Type: MultiDiGraph
Number of nodes: 64
Number of edges: 64
Average in degree:   1.0000
Average out degree:   1.0000
Name: article1021.gml
Type: MultiGraph
Number of nodes: 64
Number of edges: 64
Average degree:   2.0000
----------------------------------------
../output/network/negative/article152.gml
Name: article152.gml
Type: MultiDiGraph
Number of nodes: 78
Number of edges: 67
Average in degree:   0.8590
Average out degree:   0.8590
Name: article152.gml
Type: MultiGraph
Number of nodes: 78
Number of edges: 67
Average degree:   1.7179
----------------------------------------
../output/network/negative/article2308.gml
Name: article2308.gml
Type: MultiDiGraph
Number of nodes: 66
Number of edges: 56
Average in degree:   0.8485
Average out degree:   0.8485
Name: article2308.gml
Type: MultiGraph
Number of nodes: 66
Number of edges: 56
Average degree:   1.6970
----------------------------------------
../output/network/negative/article3335.gml
Name: article3335.gml
Type: MultiDiGraph
Number of nodes: 120
Number of edges: 128
Average in degree:   1.0667
Average out degree:   1.0667
Name: article3335.gml
Type: MultiGraph
Number of nodes: 120
Number of edges: 127
Average degree:   2.1167
----------------------------------------
../output/network/negative/article4106.gml
Name: article4106.gml
Type: MultiDiGraph
Number of nodes: 38
Number of edges: 36
Average in degree:   0.9474
Average out degree:   0.9474
Name: article4106.gml
Type: MultiGraph
Number of nodes: 38
Number of edges: 36
Average degree:   1.8947
----------------------------------------
../output/network/negative/article432.gml
Name: article432.gml
Type: MultiDiGraph
Number of nodes: 100
Number of edges: 96
Average in degree:   0.9600
Average out degree:   0.9600
Name: article432.gml
Type: MultiGraph
Number of nodes: 100
Number of edges: 94
Average degree:   1.8800
----------------------------------------
../output/network/negative/article5164.gml
Name: article5164.gml
Type: MultiDiGraph
Number of nodes: 104
Number of edges: 119
Average in degree:   1.1442
Average out degree:   1.1442
Name: article5164.gml
Type: MultiGraph
Number of nodes: 104
Number of edges: 116
Average degree:   2.2308
----------------------------------------
../output/network/negative/article5717.gml
Name: article5717.gml
Type: MultiDiGraph
Number of nodes: 62
Number of edges: 62
Average in degree:   1.0000
Average out degree:   1.0000
Name: article5717.gml
Type: MultiGraph
Number of nodes: 62
Number of edges: 62
Average degree:   2.0000
----------------------------------------
../output/network/negative/article5813.gml
Name: article5813.gml
Type: MultiDiGraph
Number of nodes: 50
Number of edges: 54
Average in degree:   1.0800
Average out degree:   1.0800
Name: article5813.gml
Type: MultiGraph
Number of nodes: 50
Number of edges: 53
Average degree:   2.1200
----------------------------------------
../output/network/negative/article621.gml
Name: article621.gml
Type: MultiDiGraph
Number of nodes: 30
Number of edges: 32
Average in degree:   1.0667
Average out degree:   1.0667
Name: article621.gml
Type: MultiGraph
Number of nodes: 30
Number of edges: 32
Average degree:   2.1333
----------------------------------------
../output/network/negative/article683.gml
Name: article683.gml
Type: MultiDiGraph
Number of nodes: 234
Number of edges: 236
Average in degree:   1.0085
Average out degree:   1.0085
Name: article683.gml
Type: MultiGraph
Number of nodes: 234
Number of edges: 235
Average degree:   2.0085
----------------------------------------
../output/network/negative/article703.gml
Name: article703.gml
Type: MultiDiGraph
Number of nodes: 282
Number of edges: 280
Average in degree:   0.9929
Average out degree:   0.9929
Name: article703.gml
Type: MultiGraph
Number of nodes: 282
Number of edges: 280
Average degree:   1.9858
----------------------------------------
../output/network/negative/article774.gml
Name: article774.gml
Type: MultiDiGraph
Number of nodes: 57
Number of edges: 54
Average in degree:   0.9474
Average out degree:   0.9474
Name: article774.gml
Type: MultiGraph
Number of nodes: 57
Number of edges: 53
Average degree:   1.8596
----------------------------------------
../output/network/negative/article782.gml
Name: article782.gml
Type: MultiDiGraph
Number of nodes: 84
Number of edges: 77
Average in degree:   0.9167
Average out degree:   0.9167
Name: article782.gml
Type: MultiGraph
Number of nodes: 84
Number of edges: 77
Average degree:   1.8333
----------------------------------------
../output/network/negative/article99.gml
Name: article99.gml
Type: MultiDiGraph
Number of nodes: 45
Number of edges: 46
Average in degree:   1.0222
Average out degree:   1.0222
Name: article99.gml
Type: MultiGraph
Number of nodes: 45
Number of edges: 44
Average degree:   1.9556

In [ ]:
network_data

In [ ]:


In [ ]:


In [ ]:
print graph

In [ ]:


In [ ]:


Drawing


In [ ]:
# read gml file
graph = nx.read_gml()
ugraph = graph.to_undirected()

In [ ]:
# plot spring layout
plt.figure(figsize=(11,11))
nx.draw_spring(graph, arrows=True, with_labels=True)

#plot circular layout
plt.figure(figsize=(12,12))
nx.draw_circular(graph, arrows=True, with_labels=True)

Analysis

Degree histogram

Return a list of the frequency of each degree value; degree values are the index in the list


In [ ]:
# returns a list of frequencies of degrees
print ("undirected graph ="), nx.degree_histogram(ugraph)
print ("directed graph ="), nx.degree_histogram(graph)

In [ ]:
# degree rank plot
# only for undirected type

degree_sequence=sorted(nx.degree(ugraph).values(),reverse=True) # degree sequence
#print "Degree sequence", degree_sequence
dmax=max(degree_sequence)

plt.loglog(degree_sequence,'b-',marker='o')
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")

# draw graph in inset
plt.axes([0.45,0.45,0.45,0.45])
Gcc=sorted(nx.connected_component_subgraphs(ugraph), key = len, reverse=True)[0]
pos=nx.spring_layout(Gcc)
plt.axis('off')
nx.draw_networkx_nodes(Gcc,pos,node_size=20)
nx.draw_networkx_edges(Gcc,pos,alpha=0.4)

plt.show()

Density

Notes: The density is 0 for a graph without edges, and density = 1 for a complete graph. The density of multigraphs can be higher than 1 (self loops are counted in the total number of edges, so graphs with self loops can have density higher than 1).


In [ ]:
print "undirected graph =", nx.density(ugraph)
print "directed graph =", nx.density(graph)

In [ ]:
# nx.diameter()

# nx.center()

Degree centrality

Degree centrality for a node v is the fraction of nodes it is connected to


In [ ]:
# get all the values of the dictionary, this returns a list of centrality scores
# turn the list into a numpy array
# take the mean of the numpy array

print "Degree centrality (directed) =", np.array(nx.degree_centrality(graph).values()).mean()
print "Degree centrality (undirected) =", np.array(nx.degree_centrality(ugraph).values()).mean()

Closeness centrality

Closeness centrality of a node u is the reciprocal of the sum of the shortest path distances from u to all n-1 other nodes. Since the sum of distances depends on the number of nodes in the graph, closeness is normalized by the sum of minimum possible distances n-1 Higher values of closeness indicate higher centrality


In [ ]:
# clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
# nx.closeness_centrality(graph)
# print "Closeness centrality (directed) =", np.array(nx.closeness_centrality(graph).values()).mean()
# print "Closeness centrality (undirected) =", np.array(nx.closeness_centrality(ugraph).values()).mean()

a = nx.closeness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['closeness centrality']
dfIn = dfIn.sort_values(by=['closeness centrality'])
dfIn

Betweenness centrality

Betweenness centrality of a node v is the sum of the fraction of all pairs shortest paths that pass through v Compute the shortest-path betweenness centrality for nodes


In [ ]:
# nx.betweenness_centrality(graph)
# bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()

# print "Betweenness centrality (directed) =", nx.betweenness_centrality(graph)
print "Betweenness centrality (directed) =", np.array(nx.betweenness_centrality(graph).values()).mean()
print "Betweenness centrality (undirected) =", np.array(nx.betweenness_centrality(ugraph).values()).mean()

a = nx.betweenness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['betweenness centrality']
dfIn = dfIn.sort_values(by=['betweenness centrality'])
dfIn

Current-flow betweenness centrality

Current-flow betweenness centrality uses an electrical current model for information spreading in contrast to betweenness centrality which uses shortest paths. Current-flow betweenness centrality is also known as random-walk betweenness centrality


In [ ]:
# run for largest component
# graph must be connected
# print nx.current_flow_betweenness_centrality(graph)

Degree assortativity coefficient


In [ ]:
#deg_ac = nx.degree_assortativity_coefficient(graph)
print "Degree assortativity coefficient (directed) =", nx.degree_assortativity_coefficient(graph)
print "Degree assortativity coefficient (undirected) =", nx.degree_assortativity_coefficient(ugraph)

Clustering coefficient


In [ ]:
# (cannot be multigraph)
# nx.average_clustering(ugraph)

Average node connectivity

The average connectivity \bar{\kappa} of a graph G is the average of local node connectivity over all pairs of nodes of G


In [ ]:
# nx.edge_connectivity(graph)
# nx.node_connectivity(graph)

# avg_node_con = nx.average_node_connectivity(graph)
print "Average node connectivity (directed) =", nx.average_node_connectivity(graph)
print "Average node connectivity (undirected) =", nx.average_node_connectivity(ugraph)

In [ ]:
# intersection_all()
# return a new graph that contains only the edges that exist in all graphs
# all supplied graphs must have the same node set
### Summary print info print "Density =" print "Degree centrality =" print "Closeness centrality =" print "Betweenness centrality =" print "Degree assortativity coefficient =" print "Degree pearson correlation coefficient =" print "Average node connectivity =" #print "Closeness vitality ="