In [54]:
import networkx as nx
import math
import numpy as np

Load data


In [55]:
lastnode = 10000

In [56]:
datafile = open('/var/datasets/wdc/small-pld-arc')

G = nx.DiGraph()

for line in datafile:
    ijstr = line.split('\t')
    
    i=int(ijstr[0])
    j=int(ijstr[1])
    
    if i>lastnode:
        break
    if j>lastnode:
        continue
    G.add_edge(i,j)
    
datafile.close()
Gorig = G.copy()

In [57]:
indexfile = open('/var/datasets/wdc/small-pld-index')
index = {}

for line in indexfile:
    namei = line.split('\t')
    
    name=namei[0]
    i=int(namei[1])
    
    if i>lastnode:
        break

    index[i]=name
    
indexfile.close()

In [58]:
def cleanupgraph(G):
    comp = nx.weakly_connected_components(G.copy())
    for c in comp:
        if len(c)<4:
            G.remove_nodes_from(c)

In [59]:
def graphcleanup(G):
    for (node, deg) in G.degree_iter():
            if deg==0:
                G.remove_node(node)
            elif deg==1:
                if G.degree((G.predecessors(node) + G.successors(node))[0]) == 1:
                    G.remove_node(node)
            elif deg==2 and G.in_degree(node)==1:
                if (G.predecessors(node) == G.successors(node)) and G.degree((G.predecessors(node) + G.successors(node))[0]) == 2:
                    G.remove_node(node)

In [60]:
cleanupgraph(G)

In [61]:
G.size()


Out[61]:
2032

In [62]:
Gorig.number_of_nodes()


Out[62]:
1049

In [63]:
Gorig.size()


Out[63]:
2419

Convert to Javascript for interactivity


In [64]:
#from IPython.core.display import display_javascript
import json
from networkx.readwrite import json_graph

In [65]:
d = json_graph.node_link_data(G)
for node in d['nodes']:
    node['name']=index[node['id']]
    node['value']=G.in_degree(node['id'])
    node['group']=index[node['id']][-3:]

json.dump(d, open('graph.json','w'))

In [66]:
%%html
<div id="d3-example"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .6;}
</style>
<script src="force.js"></script>



In [36]:
L = nx.linalg.laplacianmatrix.directed_laplacian_matrix(G)
Linv = np.linalg.inv(L)

In [37]:
L.shape


Out[37]:
(510, 510)

In [38]:
n = L.shape[0]
Reff = np.zeros((n,n))

In [39]:
Gsparse = G.copy()

In [40]:
graphcleanup(Gsparse)

In [41]:
nodelookup={Gsparse.nodes()[idx]:idx for idx in range(len(Gsparse.nodes()))}

In [42]:
edge = np.zeros((n,1))
for (i,j) in Gsparse.edges_iter():
    edge[nodelookup[i]] = 1
    edge[nodelookup[j]] = -1
    Reff[nodelookup[i],nodelookup[j]] = edge.T.dot(Linv.dot(edge))
    edge[[nodelookup[i]]] = 0
    edge[[nodelookup[j]]] = 0

In [43]:
ReffAbs=np.abs(Reff)+np.abs(Reff.T)

If you call

arr.argsort()[:3] It will give you the indices of the 3 smallest elements.

array([0, 2, 1], dtype=int64) So, for n, you should call

arr.argsort()[:n]


In [44]:
res = ReffAbs.reshape(n**2)
argp = np.argpartition(res,n**2-n)

In [45]:
mask = (ReffAbs < res[argp[-int(0.5*Gsparse.number_of_nodes())]]) & (ReffAbs >0)
for (i,j) in Gsparse.edges():
    if mask[nodelookup[i],nodelookup[j]]:
        Gsparse.remove_edge(i,j)

In [46]:
cleanupgraph(Gsparse)

In [47]:
d = json_graph.node_link_data(Gsparse)
for node in d['nodes']:
    node['name']=index[node['id']]
    node['value']=Gsparse.degree(node['id'])
    node['group']=index[node['id']][-3:]

json.dump(d, open('graph.json','w'))

In [48]:
Gorig.number_of_edges()


Out[48]:
2419

In [49]:
Gsparse.number_of_edges()


Out[49]:
239

In [50]:
Gsparse.number_of_nodes()


Out[50]:
46

In [50]:


In [51]:
GsparseAdj = nx.linalg.adjacency_matrix(Gorig).toarray()
GsparseAdj = nx.to_numpy_matrix(Gorig)
GsparseAdj[ReffAbs < res[argp[-300]]] = 0
Gsparse = nx.from_numpy_matrix?
Gsparse = nx.from_numpy_matrix
Gsparse = nx.from_numpy_matrix(GsparseAdj, create_using=nx.DiGraph())

In [52]:
edge = np.zeros((n,1))
for i in range(n):
    if i % int(math.ceil((float(10)/100)*n)) == 0: 
        print int(math.floor(100*float(i)/n)), '%'
        
    edge[i] = 1

    for j in range(i+1, n):
        edge[j] = -1
        Reff[i,j] = edge.T.dot(Linv.dot(edge))
        edge[j] = 0
        
    edge[i] = 0


0 %
10 %
20 %
30 %
40 %
50 %
60 %
70 %
80 %
90 %

In [ ]: