In [54]:
import networkx as nx
import math
import numpy as np
In [55]:
lastnode = 10000
In [56]:
datafile = open('/var/datasets/wdc/small-pld-arc')
G = nx.DiGraph()
for line in datafile:
ijstr = line.split('\t')
i=int(ijstr[0])
j=int(ijstr[1])
if i>lastnode:
break
if j>lastnode:
continue
G.add_edge(i,j)
datafile.close()
Gorig = G.copy()
In [57]:
indexfile = open('/var/datasets/wdc/small-pld-index')
index = {}
for line in indexfile:
namei = line.split('\t')
name=namei[0]
i=int(namei[1])
if i>lastnode:
break
index[i]=name
indexfile.close()
In [58]:
def cleanupgraph(G):
comp = nx.weakly_connected_components(G.copy())
for c in comp:
if len(c)<4:
G.remove_nodes_from(c)
In [59]:
def graphcleanup(G):
for (node, deg) in G.degree_iter():
if deg==0:
G.remove_node(node)
elif deg==1:
if G.degree((G.predecessors(node) + G.successors(node))[0]) == 1:
G.remove_node(node)
elif deg==2 and G.in_degree(node)==1:
if (G.predecessors(node) == G.successors(node)) and G.degree((G.predecessors(node) + G.successors(node))[0]) == 2:
G.remove_node(node)
In [60]:
cleanupgraph(G)
In [61]:
G.size()
Out[61]:
In [62]:
Gorig.number_of_nodes()
Out[62]:
In [63]:
Gorig.size()
Out[63]:
In [64]:
#from IPython.core.display import display_javascript
import json
from networkx.readwrite import json_graph
In [65]:
d = json_graph.node_link_data(G)
for node in d['nodes']:
node['name']=index[node['id']]
node['value']=G.in_degree(node['id'])
node['group']=index[node['id']][-3:]
json.dump(d, open('graph.json','w'))
In [66]:
%%html
<div id="d3-example"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .6;}
</style>
<script src="force.js"></script>
In [36]:
L = nx.linalg.laplacianmatrix.directed_laplacian_matrix(G)
Linv = np.linalg.inv(L)
In [37]:
L.shape
Out[37]:
In [38]:
n = L.shape[0]
Reff = np.zeros((n,n))
In [39]:
Gsparse = G.copy()
In [40]:
graphcleanup(Gsparse)
In [41]:
nodelookup={Gsparse.nodes()[idx]:idx for idx in range(len(Gsparse.nodes()))}
In [42]:
edge = np.zeros((n,1))
for (i,j) in Gsparse.edges_iter():
edge[nodelookup[i]] = 1
edge[nodelookup[j]] = -1
Reff[nodelookup[i],nodelookup[j]] = edge.T.dot(Linv.dot(edge))
edge[[nodelookup[i]]] = 0
edge[[nodelookup[j]]] = 0
In [43]:
ReffAbs=np.abs(Reff)+np.abs(Reff.T)
If you call
arr.argsort()[:3] It will give you the indices of the 3 smallest elements.
array([0, 2, 1], dtype=int64) So, for n, you should call
arr.argsort()[:n]
In [44]:
res = ReffAbs.reshape(n**2)
argp = np.argpartition(res,n**2-n)
In [45]:
mask = (ReffAbs < res[argp[-int(0.5*Gsparse.number_of_nodes())]]) & (ReffAbs >0)
for (i,j) in Gsparse.edges():
if mask[nodelookup[i],nodelookup[j]]:
Gsparse.remove_edge(i,j)
In [46]:
cleanupgraph(Gsparse)
In [47]:
d = json_graph.node_link_data(Gsparse)
for node in d['nodes']:
node['name']=index[node['id']]
node['value']=Gsparse.degree(node['id'])
node['group']=index[node['id']][-3:]
json.dump(d, open('graph.json','w'))
In [48]:
Gorig.number_of_edges()
Out[48]:
In [49]:
Gsparse.number_of_edges()
Out[49]:
In [50]:
Gsparse.number_of_nodes()
Out[50]:
In [50]:
In [51]:
GsparseAdj = nx.linalg.adjacency_matrix(Gorig).toarray()
GsparseAdj = nx.to_numpy_matrix(Gorig)
GsparseAdj[ReffAbs < res[argp[-300]]] = 0
Gsparse = nx.from_numpy_matrix?
Gsparse = nx.from_numpy_matrix
Gsparse = nx.from_numpy_matrix(GsparseAdj, create_using=nx.DiGraph())
In [52]:
edge = np.zeros((n,1))
for i in range(n):
if i % int(math.ceil((float(10)/100)*n)) == 0:
print int(math.floor(100*float(i)/n)), '%'
edge[i] = 1
for j in range(i+1, n):
edge[j] = -1
Reff[i,j] = edge.T.dot(Linv.dot(edge))
edge[j] = 0
edge[i] = 0
In [ ]: