In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
In [2]:
# sns.set(style='whitegrid')
In [3]:
for ftype in ['real', 'model']:
for ii in range(1,5):
file_name = ftype + str(ii)
df = pd.DataFrame.from_csv("networks/" + file_name + '.csv', header=None)
comparison = []
samples_n = 100
for jj in range(samples_n):
df.columns = ['Incoming']
outgoing = len(df[df.index==jj].values)
incoming = len(df[df['Incoming']==jj].values)
comparison.append(outgoing==incoming)
print(sum(comparison)!=samples_n)
In [4]:
dfs = [pd.DataFrame(), pd.DataFrame()]
cap = 20
for ftype in ['real', 'model']:
for ii in range(1,5):
# import file
file_name = ftype + str(ii)
mt = np.genfromtxt('networks/' + file_name + '.csv',delimiter=',').astype(np.int32)
for idx, df in enumerate(dfs):
### get distribution of outgoing vertices
nodes = mt[:, idx]
unique_nodes = len(set(nodes))
# get first N bins
dist = np.bincount(nodes)
dist_array = np.array([i/unique_nodes for i in np.bincount(dist)[:cap]])
# pad zeros at the end
dist_array = np.concatenate([dist_array[:cap],np.zeros(cap - min(len(dist_array), cap))])
# add results in dataframe
new_row = pd.DataFrame(dist_array.reshape((1,cap)), index=[file_name])
dfs[idx] = pd.concat([dfs[idx], new_row])
In [5]:
pd.options.display.float_format = '{:,.2f}'.format
In [6]:
# incoming
dfs[0]
Out[6]:
In [7]:
# outgoing
dfs[1]
Out[7]: