In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [2]:
# sns.set(style='whitegrid')

Avalia se é ou não direcionado


In [3]:
for ftype in ['real', 'model']:
    for ii in range(1,5):
        file_name = ftype + str(ii)
        df = pd.DataFrame.from_csv("networks/" + file_name + '.csv', header=None)
        comparison = []
        samples_n = 100
        for jj in range(samples_n):
            df.columns = ['Incoming']
            outgoing = len(df[df.index==jj].values)
            incoming = len(df[df['Incoming']==jj].values)
            comparison.append(outgoing==incoming)
        print(sum(comparison)!=samples_n)


True
True
True
True
True
True
True
True

Distribuição de incoming e outgoing


In [4]:
dfs = [pd.DataFrame(), pd.DataFrame()]
cap = 20

for ftype in ['real', 'model']:
    for ii in range(1,5):
    
        # import file
        file_name = ftype + str(ii)
        mt = np.genfromtxt('networks/' + file_name + '.csv',delimiter=',').astype(np.int32)
        
        for idx, df in enumerate(dfs):
            
            ### get distribution of outgoing vertices
            nodes = mt[:, idx]
            unique_nodes = len(set(nodes))
            
            # get first N bins
            dist = np.bincount(nodes)
            dist_array = np.array([i/unique_nodes for i in np.bincount(dist)[:cap]])
            
            # pad zeros at the end
            dist_array = np.concatenate([dist_array[:cap],np.zeros(cap - min(len(dist_array), cap))])
        
            # add results in dataframe
            new_row = pd.DataFrame(dist_array.reshape((1,cap)), index=[file_name])
            dfs[idx] = pd.concat([dfs[idx], new_row])

In [5]:
pd.options.display.float_format = '{:,.2f}'.format

In [6]:
# incoming
dfs[0]


Out[6]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
real1 0.75 0.30 0.18 0.11 0.08 0.06 0.04 0.03 0.02 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00
real2 0.32 0.36 0.44 0.17 0.02 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
real3 0.94 0.23 0.14 0.09 0.07 0.05 0.04 0.03 0.03 0.02 0.02 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.01
real4 0.23 0.26 0.16 0.11 0.08 0.06 0.05 0.04 0.04 0.03 0.02 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.01
model1 0.50 0.29 0.18 0.12 0.08 0.06 0.05 0.04 0.03 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00
model2 0.58 0.32 0.19 0.12 0.08 0.06 0.04 0.03 0.02 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00
model3 0.11 0.11 0.11 0.11 0.11 0.10 0.10 0.09 0.07 0.06 0.05 0.03 0.02 0.02 0.01 0.01 0.00 0.00 0.00 0.00
model4 0.67 0.33 0.19 0.12 0.08 0.06 0.04 0.03 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00

In [7]:
# outgoing
dfs[1]


Out[7]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
real1 0.00 0.19 0.20 0.14 0.10 0.07 0.05 0.04 0.03 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.00
real2 0.09 0.58 0.32 0.09 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
real3 0.00 0.07 0.06 0.05 0.05 0.05 0.05 0.04 0.04 0.04 0.04 0.03 0.03 0.03 0.03 0.02 0.02 0.02 0.02 0.02
real4 0.31 0.27 0.16 0.11 0.08 0.06 0.05 0.04 0.03 0.03 0.02 0.02 0.02 0.01 0.01 0.01 0.01 0.01 0.01 0.01
model1 0.00 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
model2 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
model3 0.11 0.11 0.11 0.11 0.11 0.10 0.10 0.09 0.07 0.06 0.05 0.03 0.02 0.02 0.01 0.01 0.00 0.00 0.00 0.00
model4 0.00 0.00 0.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00