Preprocess Dynamic Network Data and Build Small Temporal Networks

All Imports


In [1]:
#Omid55
# all imports
import numpy as np
import seaborn as sns
from scipy.sparse import csr_matrix
import warnings
import math

Functions


In [2]:
# load data from text edges list (sparse representation of edges)
# output =>
# input  =>
# fname: 
# ...
def load_data(fname, delim, skip_rows_count):
    edges = np.loadtxt(open(fname,"rb"),delimiter=delim,skiprows=skip_rows_count)
    return edges

In [3]:
# 
# ...
def build_unconstrained_network(subset_of_edges, N):
    data = np.ones(len(subset_of_edges))
    row = np.subtract(subset_of_edges[:,0],1)
    col = np.subtract(subset_of_edges[:,1],1)
    A = csr_matrix((data,(row,col)), shape=(N,N))
    #D = A.todense()
    return A

In [4]:
# process the edges list created from load_data and build evolving networks
# output =>  array of adjacency matrixes
# inputs =>
# edges:         edges list
# window_size:   window size in time or in number of edges
# step_size:     step size in time or in number of edges
# edges_as_unit:      window size in number of edges or in time
def process_data(edges, window_size, step_size, edges_as_unit=True):
    if(window_size < step_size):
        warnings.warn("You are wasting some data points. To fix that, window_size should be >= step_size.", 
                      UserWarning)
    N = int(max(edges[:,0].max(), edges[:,1].max()))      # nodes count
    E = len(edges)        
    
    if(edges_as_unit):
        # in number of edges:                             # edges count
        total = E
        
        n = math.floor((total - window_size) / step_size) + 1              # the number of chunks that will be created
        print("Info=>   Nodes:", N, "  Edges:", E, "  Networks:", n, "  WindowSize:", window_size, "  StepSize:",
              step_size, "  SamplesLeftOff:", E - window_size - (n-1)*step_size)
        As = dict.fromkeys(set(range(n)))

        for i in range(0,n):
            subset_of_edges = edges[i * step_size: i * step_size + window_size];
            As[i] = build_unconstrained_network(subset_of_edges, N)
        return As
    else:
        # in time
        start_time = edges[0,2]
        end_time = edges[-1,2]
        total = end_time - start_time + 1
        
        n = math.floor((total - window_size) / step_size) + 1       # the number of chunks that will be created
        print("Info=>   Nodes:", N, "  Edges:", E, "  Networks:", n, "  WindowSize:", window_size, "  StepSize:",
               step_size, "  TimeLeftOff:", total - window_size - (n-1)*step_size, 
              "  SamplesLeftOff:", np.where(edges[:,2] > start_time + window_size + (n-1)*step_size)[0].size)
        As = dict.fromkeys(set(range(n)))

        for i in range(0,n):
            a = start_time + i * step_size
            b = a + window_size
            indices = np.where(abs(edges[:,2]-(a+b)/2) <= (b-a)/2)
            subset_of_edges = edges[indices]
            As[i] = build_unconstrained_network(subset_of_edges, N)
        return As

Main


In [5]:
# -- test dataset --
#e = load_data("test.txt", ',', 0)
#As = process_data(e, 5, 2)
#As = process_data(e, 50, 5, edges_as_unit=False)

e = load_data("fb-forum.txt", ',', 0)
#As = process_data(e, 200, 40)
As = process_data(e, 24*3600, 4*3600, edges_as_unit=False)

#e = load_data('dblp_coauthor/out.dblp_coauthor', ' ', 1, 3)


Info=>   Nodes: 899   Edges: 33720   Networks: 981   WindowSize: 86400   StepSize: 14400   TimeLeftOff: 13706.0   SamplesLeftOff: 2

In [6]:
# compute eigen values, shortest paths, centrality measures and so forth and store them in a sorted order
#        and the other dimension should be time

In [ ]:


In [ ]: