In [2]:
import matplotlib.pyplot as plt
In [5]:
import networkx as nx
In [53]:
import numpy as np
In [61]:
# Expects both files to contain csv in the following format - source,target,weight,...
fname_output= 'synthetic/synth-cascades-7/synth-cascades-7-output-at-181.txt'
fname_ground_truth = 'synthetic/synth-cascades-7/ground_truth_fixed.txt'
In [62]:
f_output = open(fname_output, 'r')
f_ground = open(fname_ground_truth, 'r')
In [63]:
added_nodes_output = set()
added_nodes_ground = set()
In [64]:
G_output = nx.DiGraph()
G_ground = nx.DiGraph()
In [65]:
for line in f_output:
try:
vals = line[:-1].split(',')
u1, u2 = map(int, vals[:2])
weight = float(vals[2])
for u in [u1, u2]:
if u not in added_nodes_output:
added_nodes_output.add(u)
G_output.add_node(u)
G_output.add_edge(u1, u2, weight=weight)
except ValueError:
print line
In [66]:
for line in f_ground:
try:
vals = line[:-1].split(',')
u1, u2 = map(int, vals[:2])
weight = float(vals[2])
for u in [u1, u2]:
if u not in added_nodes_ground:
added_nodes_ground.add(u)
G_ground.add_node(u)
G_ground.add_edge(u1, u2, weight=weight)
except ValueError:
print line
In [67]:
edges_output = set([(u, v) for u,v,d in G_output.edges(data=True)])
edges_ground = set([(u, v) for u,v,d in G_ground.edges(data=True)])
In [68]:
tp_set = edges_output.intersection(edges_ground)
fp_set = edges_output - tp_set
fn_set = edges_ground - tp_set
tp = len(tp_set)
fp = len(fp_set)
fn = len(fn_set)
precision = tp / float(tp + fp)
recall = tp / float(tp + fn)
f1_score = (2 * precision * recall) / (precision + recall)
mse = np.mean([np.square(G_ground[u][v]['weight'] - G_output[u][v]['weight']) for u,v in tp_set])
In [69]:
# Print stats
print '-----Output-----'
print 'Number of nodes: ', len(added_nodes_output)
print 'Number of edges: ', len(edges_output)
print '-----Ground truth-----'
print 'Number of nodes: ', len(added_nodes_ground)
print 'Number of edges: ', len(edges_ground)
print '-----Stats for edges-----'
print 'True Positives: ', tp
print 'False Positives: ', fp
print 'False Negatives: ', fn
print 'Precision: ', precision
print 'Recall: ', recall
print 'F1 score: ', f1_score
print 'Mean Square Error: ', mse
In [54]:
[np.square(G_ground[u][v]['weight'] - G_output[u][v]['weight']) for u,v in tp_set]
Out[54]:
In [ ]: