In [2]:
import matplotlib.pyplot as plt

In [5]:
import networkx as nx

In [53]:
import numpy as np

In [61]:
# Expects both files to contain csv in the following format - source,target,weight,...
fname_output= 'synthetic/synth-cascades-7/synth-cascades-7-output-at-181.txt'
fname_ground_truth = 'synthetic/synth-cascades-7/ground_truth_fixed.txt'

In [62]:
f_output = open(fname_output, 'r')
f_ground = open(fname_ground_truth, 'r')

In [63]:
added_nodes_output = set()
added_nodes_ground = set()

In [64]:
G_output = nx.DiGraph()
G_ground = nx.DiGraph()

In [65]:
for line in f_output:
    try:
        vals = line[:-1].split(',')
        u1, u2 = map(int, vals[:2])
        weight = float(vals[2])
        for u in [u1, u2]:
            if u not in added_nodes_output:
                added_nodes_output.add(u)
                G_output.add_node(u)
        G_output.add_edge(u1, u2, weight=weight)
    except ValueError:
        print line

In [66]:
for line in f_ground:
    try:
        vals = line[:-1].split(',')
        u1, u2 = map(int, vals[:2])
        weight = float(vals[2])
        for u in [u1, u2]:
            if u not in added_nodes_ground:
                added_nodes_ground.add(u)
                G_ground.add_node(u)
        G_ground.add_edge(u1, u2, weight=weight)
    except ValueError:
        print line

In [67]:
edges_output = set([(u, v) for u,v,d in G_output.edges(data=True)])
edges_ground = set([(u, v) for u,v,d in G_ground.edges(data=True)])

In [68]:
tp_set = edges_output.intersection(edges_ground)
fp_set = edges_output - tp_set
fn_set = edges_ground - tp_set

tp = len(tp_set)
fp = len(fp_set)
fn = len(fn_set)

precision = tp / float(tp + fp)
recall = tp / float(tp + fn)
f1_score = (2 * precision * recall) / (precision + recall)

mse = np.mean([np.square(G_ground[u][v]['weight'] - G_output[u][v]['weight']) for u,v in tp_set])

In [69]:
# Print stats
print '-----Output-----'
print 'Number of nodes: ', len(added_nodes_output)
print 'Number of edges: ', len(edges_output)

print '-----Ground truth-----'
print 'Number of nodes: ', len(added_nodes_ground)
print 'Number of edges: ', len(edges_ground)

print '-----Stats for edges-----'
print 'True Positives: ', tp
print 'False Positives: ', fp
print 'False Negatives: ', fn
print 'Precision: ', precision
print 'Recall: ', recall
print 'F1 score: ', f1_score
print 'Mean Square Error: ', mse


-----Output-----
Number of nodes:  86
Number of edges:  233
-----Ground truth-----
Number of nodes:  94
Number of edges:  898
-----Stats for edges-----
True Positives:  125
False Positives:  108
False Negatives:  773
Precision:  0.536480686695
Recall:  0.139198218263
F1 score:  0.221043324492
Mean Square Error:  0.351773592633

In [54]:
[np.square(G_ground[u][v]['weight'] - G_output[u][v]['weight']) for u,v in tp_set]


Out[54]:
[0.53292482028900001,
 0.29214673603599994,
 3.0685895241210006,
 1.039459772521,
 7.2253439999999784e-06,
 0.10677079056399999,
 1.1203862612890001,
 0.011962890625,
 0.00027070120899999985,
 0.0050572009959999973,
 0.98035147612899998,
 0.26659976688899995,
 1.3372031406249998,
 0.79159989896100003,
 2.7353223621610003,
 0.34560054288399999,
 0.54733807132900014,
 0.66299980550400017,
 1.4378000408889997,
 1.0287618898410003,
 2.0140264588959997,
 2.6300017059840002,
 1.1502176403239999,
 0.035975846929000015,
 6.6113160999999989e-05,
 1.537143713856,
 0.83742631210000018,
 0.45799191600100014,
 0.11264816816100001]

In [ ]: