In [1]:
import json
import numpy as np

In [7]:
# full = json.load(open("./hashtag_network.json", "r"))
full = json.load(open("./handle_network.json", "r"))

In [8]:
new_edges, new_nodes = (full['edges'], full['nodes'])

In [9]:
len(new_edges),len(new_nodes)


Out[9]:
(106211, 72296)

In [10]:
label2nodeid = {}
node_thresh = 500
nodes2remove = []
for i, node in enumerate(new_nodes):
    new_nodes[i]['id'] = i
    if new_nodes[i]['size']<node_thresh:
        nodes2remove.append(i)
        continue
    label2nodeid.update({node['label']:i})
    new_nodes[i]['size'] = max(int(np.log(int(node['size']))),1)
    new_nodes[i].pop("x",None)
    new_nodes[i].pop("y",None)
    
        
for i in sorted(nodes2remove)[::-1]:
    new_nodes.pop(i)
print(len(new_nodes))


57

In [11]:
edges2remove = []
edge_thresh = 10
for i, edge in enumerate(new_edges):
    new_edges[i]['id'] = i
    try:
        new_edges[i]['source'] = label2nodeid[edge['source']]
        new_edges[i]['target'] = label2nodeid[edge['target']]
        if new_edges[i]['size']<edge_thresh:
            edges2remove.append(i)
        new_edges[i]['size'] = max(int(edge['size']),1)
#         new_edges[i]['size'] = max(int(np.log(int(edge['size']))),1)
        new_edges[i]['attributes'] = "{Weight:"+str(new_edges[i]['size'])+'}'
    except KeyError:
        edges2remove.append(i)
        
for i in sorted(edges2remove)[::-1]:
    new_edges.pop(i)
print(len(new_edges))


197

In [12]:
len(new_edges)


Out[12]:
197

In [13]:
len(full['edges'])


Out[13]:
197

In [14]:
new_full = {
    'nodes':new_nodes,
    'edges':new_edges
}

In [15]:
json.dump(new_full, open('./handle_network_cleaned.json',"w"))

In [ ]: