In [4]:
import json
import os
import codecs
import operator
pagesLCS = json.load(codecs.open('./pages_LCS.json', 'r', 'utf-8'))
graph = {}
for page in pagesLCS:
p1, p2 = page.split('_')
if p1 in graph:
graph[p1][p2] = pagesLCS[page]
else:
graph[p1] = {p2: pagesLCS[page]}
threshold = 0.05
edges = {}
for p1 in graph:
for p2 in graph[p1]:
if graph[p1][p2] < threshold or graph[p2][p1] < threshold:
continue
if p1 in edges:
edges[p1][p2] = 1 - min(graph[p1][p2], graph[p2][p1])
else:
edges[p1] = {p2: 1 - min(graph[p1][p2], graph[p2][p1])}
if p2 in edges:
edges[p2][p1] = 1 - min(graph[p1][p2], graph[p2][p1])
else:
edges[p2] = {p1: 1 - min(graph[p1][p2], graph[p2][p1])}
out = open('edgelist.txt', 'w')
for p1 in edges:
for p2 in edges[p1]:
out.write(str(p1)+' '+str(p2)+' '+str(edges[p1][p2])+'\n')
out.close()
In [6]:
import facebook
graph = facebook.GraphAPI(access_token='EAACEdEose0cBAHwCiPdZCE65CJv8a8BcmZBbHawSspPPy0zHHuIb9ov8J4t9Ng8VjPmUPrLxP4hZC0DTRb1rRzxndnD7CviULyfWArRh8zbA0yMceHKqEvDM0gm54MOCbZCYVCHiFBrMHfPLlDPXpaGdZCZCor1dbDfFKmw9LQUZADUMbJ8nasgDRrOtlZBQF0IZD', version='2.2')
file = open('edgelist.txt')
out = open('edgelist.csv', 'w')
out.write('page1,name1,category1,page2,name2,category2,distance\n')
for line in file:
p1, p2, dis = line.split()
try:
obj1 = graph.get_object(id=p1, fields='name,category')
obj2 = graph.get_object(id=p2, fields='name,category')
out.write('"'+p1+'","'+obj1['name']+'","'+obj1['category']+'","'+p2+'","'+obj2['name']+'","'+obj2['category']+'","'+dis+'"\n')
except:
pass
file.close()
out.close()
print('Complete')
In [5]:
file = open('edgelist.txt')
out = open('edgelist.csv', 'w')
out.write('page1,page2,distance\n')
for line in file:
p1, p2, dis = line.split()
out.write('"'+p1+'","'+p2+'","'+dis+'"\n')
file.close()
out.close()
print('Complete')
In [ ]: