In [11]:
import networkx as nx

In [12]:
import matplotlib.pyplot as plt

In [13]:
import random

In [14]:
import numpy as np

In [15]:
from collections import deque

In [36]:
NUM_CASCADES = 500

In [37]:
WALK_LEN = 2

In [18]:
TIME_LIMIT = 350

In [44]:
#fname_friendship = 'FoursquareFriendship-zurich.csv'
fname_friendship = ''
fname_cascades = 'synthetic/synth-cascades-6/synth-cascades-6.txt'
#fname_friendship_weights = fname_friendship.split('.')[0] + '-with-weights.csv'
fname_friendship_weights = 'fig2-with-weights.csv'

In [23]:
ffr = open(fname_friendship, 'r')

In [45]:
fout = open(fname_cascades, 'w')
fout_ffr = open(fname_friendship_weights, 'w')

In [40]:
added_nodes = set()

In [41]:
G = nx.DiGraph()

In [28]:
for line in ffr:
    try:
        u1, u2 = map(int, line[:-1].split(','))
        for u in [u1, u2]:
            if u not in added_nodes:
                added_nodes.add(u)
                G.add_node(u)
        G.add_edge(u1, u2, weight=np.random.uniform(0.01, 1))
    except ValueError:
        print line

In [47]:
for source, target, d in G.edges(data=True):
    fout_ffr.write('%d,%d,%f\n' % (source, target, d['weight']))
fout_ffr.close()

In [30]:
nx.draw(G)

In [31]:
plt.show()

In [48]:
pos = nx.spring_layout(G)
nx.draw_networkx_edges(G, pos)
edge_labels = dict([((u, v,), d['weight']) for u, v, d in G.edges(data=True)])
nx.draw_networkx_labels(G, pos, edge_labels=edge_labels)
plt.show()

In [46]:
G = nx.read_gexf("synthetic/synth-cascades-zurich-fig2/synth-cascades-zurich-fig2.gexf", node_type=int)
added_nodes = set(G.nodes())

In [49]:
# Write all the nodes to cascades file
for uid in sorted(list(added_nodes)):
    fout.write("%d,%d\n" % (uid, uid))
fout.write("\n")

In [ ]:
for i in range(NUM_CASCADES):
    fout.write("%d;" % i)
    # Pick a random node and start a random walk of length walk_len, with memory
    start_node = random.sample(added_nodes, 1)[0]
    visited_nodes = set([start_node])
    start_time = 1 #random.randint(0, 300)
    frontier_nodes = deque([(start_node, None, start_time, 0), ])
    cur_node = start_node
    cur_time = start_time
    visit_str = ""
    while len(frontier_nodes) > 0:
        cur_node, cur_node_parent, cur_time, depth = frontier_nodes.popleft()
        if depth > WALK_LEN or cur_time > TIME_LIMIT:
            continue
        try:
            print cur_node, cur_time, depth, G[cur_node_parent][cur_node]['weight']
        except KeyError:
            pass
        visit_str += "%d,%d," % (cur_node, cur_time)
        cur_neighbours = set(G.neighbors(cur_node))
        visited_nodes.add(cur_node)        
        try:
            nodes_to_infect = cur_neighbours - visited_nodes
            for node in nodes_to_infect:
                trans_rate = G[cur_node][node]['weight']
                # For exponential model, solving f(t_i | t_j; alpha_ji) = 1 = alpha_ji * exp(...)
                # In the line below, Second term's domain = [0.01, 1.00], range = [0, -350]
                # TODO Maybe remove the regularization factor?
                # next_time = cur_time - (np.log(trans_rate)/trans_rate) * (-350*0.01/np.log(0.01))
                next_time = cur_time + np.random.exponential(1/trans_rate)
                frontier_nodes.append( (node, cur_node, np.ceil(next_time), depth+1) )
        except ValueError:
            pass
    fout.write(visit_str[:-1] + "\n")

In [ ]:
'''
for i in range(NUM_CASCADES):
    fout.write("%d;" % i)
    # Pick a random node and start a random walk of length walk_len, with memory
    start_node = random.sample(added_nodes, 1)[0]
    visited_nodes = set([start_node])
    start_time = random.randint(0, 300)
    cur_node = start_node
    cur_time = start_time
    visit_str = ""
    for j in range(WALK_LEN):
        print cur_node, "->",
        visit_str += "%d,%d," % (cur_node, cur_time)
        # Pick a random unvisited neighbour
        cur_neighbours = set(G.neighbors(cur_node))
        visited_nodes.add(cur_node)        
        try:
            next_node = random.sample(cur_neighbours-visited_nodes, 1)[0]
            cur_node = next_node
            cur_time += 1
        except ValueError:
            break # End the walk. No more unvisited nodes to traverse
    fout.write(visit_str[:-1] + "\n")
    print "END"
'''

In [52]:
fout.close()

In [10]:
for u, v, d in G.edges(data=True):
    print u, v, d['weight']


2 8 0.77
2 1 1.87
3 9 1.07
3 4 0.44
4 5 1.4
4 6 1.82
5 3 0.11
7 9 1.51
7 5 1.86
8 10 0.19
8 12 1.31
9 2 0.27
9 6 0.92
9 7 1.18
10 8 0.19
10 9 0.1
10 22 1.36
12 18 0.14
12 11 0.77
12 13 0.02
14 10 0.83
15 16 0.07
15 9 1.04
17 15 1.37
18 3 1.69
18 22 1.05
19 20 1.34
19 13 1.06
21 18 1.52
23 22 0.84

In [ ]: