In [4]:
import json
import os
import codecs
import operator
pagesLCS = json.load(codecs.open('./pages_LCS.json', 'r', 'utf-8'))
graph = {}
for page in pagesLCS:
    p1, p2 = page.split('_')
    if p1 in graph:
        graph[p1][p2] = pagesLCS[page]
    else:
        graph[p1] = {p2: pagesLCS[page]}
threshold = 0.05
edges = {}
for p1 in graph:
    for p2 in graph[p1]:
        if graph[p1][p2] < threshold or graph[p2][p1] < threshold:
            continue
        if p1 in edges:
            edges[p1][p2] = 1 - min(graph[p1][p2], graph[p2][p1])
        else:
            edges[p1] = {p2: 1 - min(graph[p1][p2], graph[p2][p1])}
        if p2 in edges:
            edges[p2][p1] = 1 - min(graph[p1][p2], graph[p2][p1])
        else:
            edges[p2] = {p1: 1 - min(graph[p1][p2], graph[p2][p1])}
out = open('edgelist.txt', 'w')
for p1 in edges:
    for p2 in edges[p1]:
        out.write(str(p1)+' '+str(p2)+' '+str(edges[p1][p2])+'\n')
out.close()

In [6]:
import facebook

graph = facebook.GraphAPI(access_token='EAACEdEose0cBAHwCiPdZCE65CJv8a8BcmZBbHawSspPPy0zHHuIb9ov8J4t9Ng8VjPmUPrLxP4hZC0DTRb1rRzxndnD7CviULyfWArRh8zbA0yMceHKqEvDM0gm54MOCbZCYVCHiFBrMHfPLlDPXpaGdZCZCor1dbDfFKmw9LQUZADUMbJ8nasgDRrOtlZBQF0IZD', version='2.2')

file = open('edgelist.txt')
out = open('edgelist.csv', 'w')
out.write('page1,name1,category1,page2,name2,category2,distance\n')

for line in file:
    p1, p2, dis = line.split()
    try:
        obj1 = graph.get_object(id=p1, fields='name,category')
        obj2 = graph.get_object(id=p2, fields='name,category')
        out.write('"'+p1+'","'+obj1['name']+'","'+obj1['category']+'","'+p2+'","'+obj2['name']+'","'+obj2['category']+'","'+dis+'"\n')
    except:
        pass

file.close()
out.close()
print('Complete')


Complete

In [5]:
file = open('edgelist.txt')
out = open('edgelist.csv', 'w')
out.write('page1,page2,distance\n')
for line in file:
    p1, p2, dis = line.split()
    out.write('"'+p1+'","'+p2+'","'+dis+'"\n')
file.close()
out.close()
print('Complete')


Complete

In [ ]: