Visual Graphing of the Reddit Network

In [83]:
# preambles
import networkx as nx
import cPickle as pickle
import os

# load pickled network file
pickle_name = 'Data_world8_network.pckl'
pickle_dir = 'C:\\Users\\FG\\Desktop\\PhD\\Research\\reddit\\Pickled Data'
reddit_network = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded reddit network from ", pickle_dir + os.sep + pickle_name


loaded reddit network from  C:\Users\FG\Desktop\PhD\Research\reddit\Pickled Data\Data_world8_network.pckl

In [91]:
#create graph
reddit_graph = nx.Graph(name='Reddit Graph')

# add edges (nodes added automatically)
for userA in reddit_network.keys():
    for userB in reddit_network[userA].keys():
        # add only if enough messages between the two users
        if len(reddit_network[userA][userB])>=2 and (userA != userB):
            reddit_graph.add_edge(userA, userB)

# save largest connected subgraph
reddit_graph = sorted(nx.connected_component_subgraphs(reddit_graph), key = len, reverse=True)[0]

In [92]:
# Stats
print 'degrees:',nx.degree_histogram(reddit_graph)
print 'info: ', nx.info(reddit_graph)
print 'density: ', nx.density(reddit_graph), ' (0-1 scale, 0 for empty graph, 1 for complete graph)'


degrees: [0, 3540, 1368, 644, 351, 220, 136, 117, 87, 50, 46, 37, 26, 29, 21, 14, 14, 9, 14, 15, 5, 3, 7, 4, 8, 3, 2, 2, 1, 6, 2, 2, 3, 1, 0, 2, 0, 0, 0, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
info:  Name: Reddit Graph
Type: Graph
Number of nodes: 6804
Number of edges: 9279
Average degree:   2.7275
density:  0.000400928006396  (0-1 scale, 0 for empty graph, 1 for complete graph)

In [86]:
# draw the graph
import matplotlib.pyplot as plt
%matplotlib inline

In [93]:
nx.draw_networkx(reddit_graph, pos=nx.spring_layout(reddit_graph, iterations=5), with_labels=False, node_size=10)
plt.xlim(-0.05,1.05)
plt.ylim(-0.05,1.05)
plt.axis('off')
plt.show()



In [94]:
# extract list of triangles
triangles_dict = nx.triangles(reddit_graph)
nodes_in_triangles = [node for node in reddit_graph.nodes() if triangles_dict[node]>=1]
print '# of triangles in graph:', sum(nx.triangles(reddit_graph).values())/3

# list of triangles
triangle_list=[] 
done=set()  
for n in reddit_graph: 
    done.add(n)    # 
    nbrdone=set()    # 
    nbrs=set(reddit_graph[n]) 
    for nbr in nbrs: 
        if nbr in done:    # 
            continue    # 
        nbrdone.add(nbr)    # 
        for both in nbrs.intersection(reddit_graph[nbr]): 
            if both in done or both in nbrdone:    # 
                continue    # 
            triangle_list.append( (n,nbr,both) )


# of triangles in graph: 151

In [95]:
#reconstruct graph from triangles (with only edges in those triangles)

from itertools import combinations

triangles_reddit_graph = nx.Graph(name='Triangles Reddit Graph')

for triangle in triangle_list:
    for n1, n2 in combinations(triangle,2):
        # add edge 
         triangles_reddit_graph.add_edge(n1, n2)

In [102]:
nx.draw_networkx(triangles_reddit_graph, pos=nx.spring_layout(triangles_reddit_graph, iterations=15), with_labels=False, node_size=10)
plt.xlim(-0.05,1.05)
plt.ylim(-0.05,1.05)
plt.axis('off')
plt.show()



In [97]:
# Stats
# print 'degrees:',nx.degree_histogram(reddit_graph)
print 'info: ', nx.info(triangles_reddit_graph)
print 'density: ', nx.density(triangles_reddit_graph), ' (0-1 scale, 0 for empty graph, 1 for complete graph)'

# plot the degre dist
print "Degree Distribution"
hist=nx.degree_histogram(triangles_reddit_graph)
plt.bar(range(len(hist)), hist, align='center')
plt.xlim(0,30)
plt.show()


info:  Name: Triangles Reddit Graph
Type: Graph
Number of nodes: 243
Number of edges: 377
Average degree:   3.1029
density:  0.0128218209026  (0-1 scale, 0 for empty graph, 1 for complete graph)
Degree Distribution

In [90]: