In [11]:
    
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import networkx as nx
import pylab as plt
from collections import Counter
import community
    
In [192]:
    
# get meme_names
meme_name="thevoice"
results_path="/home/clemsos/Dev/mitras/results/"
meme_path=results_path+"/"+meme_name
    
In [193]:
    
meme_graph_csv=meme_path+"/"+meme_name+"_edges.csv"
# meme_graph_csv=meme_path+"/"+meme_name+"_d3graph.csv"
with open(meme_graph_csv, 'rb') as edgefile:
    edgefile.next() # skip headers
    edgecsv=csv.reader(edgefile)
    
    # edges=[ str(str(edge[0])+","+str(edge[1])+","+str(edge[2])) for edge in edgecsv]
    
    edges=[(e[0], e[1]) for e in edgecsv]
    edges_weighted=[str(p[0][0]+" "+p[0][1]+" "+str(p[1])) for p in Counter(edges).most_common()] # if p[1] > 1]
        
    
    print "Edges (raw files) : %d edges"%len(edges)
    print "Weighted edges %d"%len(edges_weighted)
    G = nx.read_weighted_edgelist(edges_weighted, nodetype=str, delimiter=" ",create_using=nx.DiGraph())
    
    # G=nx.read_weighted_edgelist(edgefile, create_using=nx.DiGraph(), delimiter=",")
    
    
In [195]:
    
G_components = nx.connected_component_subgraphs(G.to_undirected())
print "Number of components: %d"%nx.number_connected_components(G.to_undirected())
# print "Number of strongly connected components: %d"%nx.number_strongly_connected_components(G)
main_components=[g for g in G_components[0:5]]
for i,g in enumerate(main_components): 
    print "cluster %i : %.3f %% "%(i,(float(g.number_of_nodes())/G.number_of_nodes())*100)
    
    
In [196]:
    
G_mc = G_components[0]
G_mc_per_total= (float(G_mc.number_of_nodes())/G.number_of_nodes())*100
print "Biggest connected component is %.2f %% of the graph" % G_mc_per_total
nx.write_graphml(G_mc, meme_path+"/"+meme_name+"_main_component.graphml")
# use the most connected component as the main one
# G = G_components[0]
    
    
Measures how well a network decomposes into modular communities.
A high modularity score indicates sophisticated internal structure. This structure, often called a community structure, describes how the the network is compartmentalized into sub-networks. These sub-networks (or communities) have been shown to have significant real-world meaning.
In [168]:
    
# create dendogram
dendo = community.generate_dendogram(G.to_undirected())
for level in range(len(dendo) - 1) :
    print "partition at level", level #, "is", community.partition_at_level(dendo, level)
    
    
In [172]:
    
# from http://perso.crans.org/aynaud/communities/ 
# Best partition
best_partition = community.best_partition(G.to_undirected()) 
modularity=community.modularity(best_partition,G.to_undirected())
print "Modularity of the best partition: %f"%modularity
print "Number of nodes in the best partition : ", len(set(best_partition.values()))
    
    
In [173]:
    
G_ok=community.induced_graph(best_partition,G.to_undirected())
nx.write_graphml(G_ok, meme_path+"/"+meme_name+"_best_partition.graphml")
# nx.draw(G_ok)
# print  best_partition.values()
# for index in best_partition:
    # print best_partition[index]
    #print a
    # print "modularity: %.3f %%"%(community.modularity(communities, G.to_undirected()))    
# Modularity
# partitions = community.best_partition(G.to_undirected()) 
# print partition.values()
    
Betweenness centrality is a measure of a node's centrality in a network. It is equal to the number of shortest paths from all vertices to all others that pass through that node. Betweenness centrality is a more useful measure (than just connectivity) of both the load and importance of a node. The former is more global to the network, whereas the latter is only a local effect.
In [186]:
    
# Create ordered tuple of centrality data
cent_dict=nx.betweenness_centrality (G.to_undirected())
cent_items=[(b,a) for (a,b) in cent_dict.iteritems()]
# Sort in descending order 
cent_items.sort() 
cent_items.reverse() 
# Highest centrality 
for c in cent_items[0:5]:
    print "Highest betweeness centrality :%f"%c[0]
    
    
In [194]:
    
%pylab inline
in_degrees = G.in_degree() # dictionary node:degree
in_count=[v for v in  Counter(in_degrees.values()).most_common() if v[0] > 2] 
in_values=[v[0] for v in in_count]
in_hist=[v[1] for v in in_count]
out_degrees = G.out_degree() # dictionary node:degree
out_count=[v for v in  Counter(out_degrees.values()).most_common() if v[0] > 2] 
out_values=[v[0] for v in out_count]
out_hist=[v[1] for v in out_count]
in_degree_sequence=sorted(G.in_degree().values(),reverse=True) # degree sequence
out_degree_sequence=sorted(G.out_degree().values(),reverse=True) # degree sequence
dmax=max(degree_sequence)
plt.loglog(in_degree_sequence,'b-',marker='o')
plt.loglog(out_degree_sequence,'r-',marker='v')
# plt.figure()
# plt.plot(in_values,in_hist,'rx') # in-degree
# plt.plot(out_values,out_hist,'bv') # out-degree
plt.legend(['In-degree','Out-degree'])
plt.xlabel('Rank')
plt.ylabel('Degree')
plt.title(meme_name+' degree distribution')
for d in Counter(in_degrees.values()).most_common(5):
    print "Highest in-degree value : %d"% d[1]
for d in Counter(out_degrees.values()).most_common(5):
    print "Highest out-degree value : %d"% d[1]
    
    
    
    
In [29]:
    
# draw partition graph
size = float(len(set(partition.values())))
pos = nx.spring_layout(G.to_undirected())
count = 0.
for com in set(partition.values()) :
     count = count + 1.
     list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
     nx.draw_networkx_nodes(G.to_undirected(), pos, list_nodes, node_size = 20, node_color = str(count / size))
# nx.draw_networkx_edges(G.to_undirected(),pos, alpha=0.5)
    
    Out[29]:
    
A clustering coefficient is a measure of the degree to which nodes in a graph tend to cluster together. Evidence suggests that in most real-world networks, and in particular social networks, nodes tend to create tightly knit groups characterised by a relatively high density of ti
The global clustering coefficient is based on triplets of nodes.
Network average clustering coefficient
A graph is considered small-world :
In [65]:
    
N,K = G.order(), G.size()
print "Nodes: ", N
print "Edges: ", K
avg_deg = float(K)/N
print "Average degree: ", avg_deg
# Clustering coefficient of all nodes (in a dictionary)
ccs = nx.clustering(G.to_undirected())
# Average clustering coefficient
avg_clust_coef = sum(ccs.values()) / len(ccs)
# also : nx.algorithms.cluster.average_clustering(G.to_undirected())
print "Average clustering coeficient: %f"%avg_clust_coef
# random graph
#  G_random=nx.erdos_renyi_graph(len(G.nodes()), 0.5)
# nx.algorithms.cluster.average_clustering(G_random)
    
    
In [1]:
    
cliques=[c for c in nx.find_cliques(G.to_undirected())]
cliques_length=[len(c) for c in nx.find_cliques(G.to_undirected())]
print "total cliques: %d"%len(cliques_length)
print "max clique length: %d"%max(cliques_length)
print "average clique length: %d"%average(cliques_length)
cliques_dist=Counter(cliques_length).most_common()
title="Cliques distribution"
plt.bar([c[0]for c in cliques_dist ],[c[1]for c in cliques_dist ])
plt.grid(True,linestyle='-',color='0.75')
plt.ylabel('Counts')
plt.title(title,fontstyle='italic')
    
    
In [ ]:
    
    
In [176]:
    
# nx.average_shortest_path_length(G)
# G_components = nx.connected_component_subgraphs(G.to_undirected())
#for g in G_components:
# print(nx.average_shortest_path_length(g))
    
    
In [247]:
    
    
In [218]:
    
try:
    from networkx import graphviz_layout
    layout=nx.graphviz_layout
except ImportError:
    print "PyGraphviz not found; drawing with spring layout; will be slow."
    layout=nx.spring_layout
    
region=220 # for pylab 2x2 subplot layout
plt.subplots_adjust(left=0,right=1,bottom=0,top=0.95,wspace=0.01,hspace=0.01)
pvals=[0.003, 0.006, 0.008, 0.015]
for p in pvals:
    # G=nx.binomial_graph(n,p)
    pos=layout(G)
    region+=1
    plt.subplot(region)
    plt.title("p = %6.3f"%(p))
    nx.draw(G,pos,
            with_labels=False,
            node_size=10
            )
    
    # identify largest connected component
    Gcc=nx.connected_component_subgraphs(G)
    G0=Gcc[0]
    nx.draw_networkx_edges(G0,pos,
                           with_labels=False,
                           edge_color='r',
                           width=6.0
                        )
    # show other connected components
    for Gi in Gcc[1:]:
       if len(Gi)>1:
          nx.draw_networkx_edges(Gi,pos,
                                 with_labels=False,
                                 edge_color='r',
                                 alpha=0.3,
                                 width=5.0
                                 )
    
    
    
    
In [149]:
    
# from http://perso.crans.org/aynaud/communities/ 
partition = community.best_partition(G.to_undirected()) 
print "Partitions found: ", len(set(partition.values())) 
# Modularity
modularity=community.modularity(partition,G.to_undirected())
print "Modularity: %f"%modularity
    
    
In [122]:
    
size = float(len(set(partition.values())))
pos = nx.spring_layout(G.to_undirected())
count = 0.
for com in set(partition.values()) :
     count = count + 1.
     list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
     nx.draw_networkx_nodes(G.to_undirected(), pos, list_nodes, node_size = 20, node_color = str(count / size))
nx.draw_networkx_edges(G.to_undirected(),pos, alpha=0.5)
    
    
    Out[122]:
    
In [119]:
    
import numpy as np
ddg=community.generate_dendogram(G.to_undirected())
len(ddg)
#for level in range(len(ddg) - 1) :
#     print "partition at level", level, "is", community.partition_at_level(ddg, level)
    
    Out[119]:
In [8]:
    
# Betweenness centrality
# bet_cen = nx.betweenness_centrality(G_mc)
# print "average_betweenness_centrality %f"%bet_cen
# Closeness centrality
# clo_cen = nx.closeness_centrality(G_mc)
# print "closeness_centrality %f"%clo_cen
# Eigenvector centrality
# eig_cen = nx.eigenvector_centrality(G_mc)
# print eig_cen
#print "Eigenvector centrality : %f"%eig_cen[0]