In [1]:
datadir = "/var/datasets/twitter/"
In [2]:
file = datadir + "twitter_combined.txt"
In [3]:
!ls -lh {file}
In [3]:
!tail {file}
In [4]:
!wc -l {file}
In [14]:
#!sort {file} > {datadir}sorted_twitter_combined.txt
!uniq {datadir}/sorted_twitter_combined.txt | wc -l
In [4]:
import networkx as nx
import json
from networkx.readwrite import json_graph
In [5]:
G = nx.read_edgelist(file, create_using=nx.DiGraph(), nodetype=int,)
In [6]:
G.size()
Out[6]:
In [7]:
G.order()
Out[7]:
In [53]:
Gs = G.subgraph(G.nodes()[0:4000])
In [54]:
Gs.size()
Out[54]:
In [55]:
def cleanupgraph(G):
comp = nx.weakly_connected_components(G.copy())
for c in comp:
if len(c)<200:
G.remove_nodes_from(c)
In [56]:
cleanupgraph(Gs)
In [57]:
Gs.size()
Out[57]:
In [58]:
# Save and plot graph
d = json_graph.node_link_data(Gs)
for node in d['nodes']:
node['name']=node['id']
node['value']=Gs.degree(node['id'])
#d['adjacency'] = json_graph.adjacency_data(Gs)['adjacency']
json.dump(d, open('graph.json','w'))
get_ipython().run_cell_magic(u'html', u'', u'<div id="d3-example"></div>\n<style>\n.node {stroke: #fff; stroke-width: 1.5px;}\nmarker {stroke: #999;}\n.link {stroke: #999; stroke-opacity: .6;}\n</style>\n<script src="force.js"></script>')
#nx.draw_graphviz(G)
In [ ]: