In [1]:
import json
import numpy as np
In [7]:
# full = json.load(open("./hashtag_network.json", "r"))
full = json.load(open("./handle_network.json", "r"))
In [8]:
new_edges, new_nodes = (full['edges'], full['nodes'])
In [9]:
len(new_edges),len(new_nodes)
Out[9]:
In [10]:
label2nodeid = {}
node_thresh = 500
nodes2remove = []
for i, node in enumerate(new_nodes):
new_nodes[i]['id'] = i
if new_nodes[i]['size']<node_thresh:
nodes2remove.append(i)
continue
label2nodeid.update({node['label']:i})
new_nodes[i]['size'] = max(int(np.log(int(node['size']))),1)
new_nodes[i].pop("x",None)
new_nodes[i].pop("y",None)
for i in sorted(nodes2remove)[::-1]:
new_nodes.pop(i)
print(len(new_nodes))
In [11]:
edges2remove = []
edge_thresh = 10
for i, edge in enumerate(new_edges):
new_edges[i]['id'] = i
try:
new_edges[i]['source'] = label2nodeid[edge['source']]
new_edges[i]['target'] = label2nodeid[edge['target']]
if new_edges[i]['size']<edge_thresh:
edges2remove.append(i)
new_edges[i]['size'] = max(int(edge['size']),1)
# new_edges[i]['size'] = max(int(np.log(int(edge['size']))),1)
new_edges[i]['attributes'] = "{Weight:"+str(new_edges[i]['size'])+'}'
except KeyError:
edges2remove.append(i)
for i in sorted(edges2remove)[::-1]:
new_edges.pop(i)
print(len(new_edges))
In [12]:
len(new_edges)
Out[12]:
In [13]:
len(full['edges'])
Out[13]:
In [14]:
new_full = {
'nodes':new_nodes,
'edges':new_edges
}
In [15]:
json.dump(new_full, open('./handle_network_cleaned.json',"w"))
In [ ]: