All Imports
In [1]:
#Omid55
# all imports
import numpy as np
import seaborn as sns
from scipy.sparse import csr_matrix
import warnings
import math
Functions
In [2]:
# load data from text edges list (sparse representation of edges)
# output =>
# input =>
# fname:
# ...
def load_data(fname, delim, skip_rows_count):
edges = np.loadtxt(open(fname,"rb"),delimiter=delim,skiprows=skip_rows_count)
return edges
In [3]:
#
# ...
def build_unconstrained_network(subset_of_edges, N):
data = np.ones(len(subset_of_edges))
row = np.subtract(subset_of_edges[:,0],1)
col = np.subtract(subset_of_edges[:,1],1)
A = csr_matrix((data,(row,col)), shape=(N,N))
#D = A.todense()
return A
In [4]:
# process the edges list created from load_data and build evolving networks
# output => array of adjacency matrixes
# inputs =>
# edges: edges list
# window_size: window size in time or in number of edges
# step_size: step size in time or in number of edges
# edges_as_unit: window size in number of edges or in time
def process_data(edges, window_size, step_size, edges_as_unit=True):
if(window_size < step_size):
warnings.warn("You are wasting some data points. To fix that, window_size should be >= step_size.",
UserWarning)
N = int(max(edges[:,0].max(), edges[:,1].max())) # nodes count
E = len(edges)
if(edges_as_unit):
# in number of edges: # edges count
total = E
n = math.floor((total - window_size) / step_size) + 1 # the number of chunks that will be created
print("Info=> Nodes:", N, " Edges:", E, " Networks:", n, " WindowSize:", window_size, " StepSize:",
step_size, " SamplesLeftOff:", E - window_size - (n-1)*step_size)
As = dict.fromkeys(set(range(n)))
for i in range(0,n):
subset_of_edges = edges[i * step_size: i * step_size + window_size];
As[i] = build_unconstrained_network(subset_of_edges, N)
return As
else:
# in time
start_time = edges[0,2]
end_time = edges[-1,2]
total = end_time - start_time + 1
n = math.floor((total - window_size) / step_size) + 1 # the number of chunks that will be created
print("Info=> Nodes:", N, " Edges:", E, " Networks:", n, " WindowSize:", window_size, " StepSize:",
step_size, " TimeLeftOff:", total - window_size - (n-1)*step_size,
" SamplesLeftOff:", np.where(edges[:,2] > start_time + window_size + (n-1)*step_size)[0].size)
As = dict.fromkeys(set(range(n)))
for i in range(0,n):
a = start_time + i * step_size
b = a + window_size
indices = np.where(abs(edges[:,2]-(a+b)/2) <= (b-a)/2)
subset_of_edges = edges[indices]
As[i] = build_unconstrained_network(subset_of_edges, N)
return As
Main
In [5]:
# -- test dataset --
#e = load_data("test.txt", ',', 0)
#As = process_data(e, 5, 2)
#As = process_data(e, 50, 5, edges_as_unit=False)
e = load_data("fb-forum.txt", ',', 0)
#As = process_data(e, 200, 40)
As = process_data(e, 24*3600, 4*3600, edges_as_unit=False)
#e = load_data('dblp_coauthor/out.dblp_coauthor', ' ', 1, 3)
In [6]:
# compute eigen values, shortest paths, centrality measures and so forth and store them in a sorted order
# and the other dimension should be time
In [ ]:
In [ ]: