Centrality stats


In [1]:
import pandas as pd
import csv

In [2]:
#Retweet Network
path = '../../data/processed'

ktz = pd.read_csv(path + '/katz_centrality_1e1.csv')
pgr = pd.read_csv(path + '/page_rank.csv')
deg = pd.read_csv(path + '/degree_centrality.csv')
clos = pd.read_csv(path +'/closseness_centrality.csv')
bet = pd.read_csv(path +'/betweenness_centrality.csv')

In [3]:
ltc_dict = {}
with open(path + '/lt_centrality.csv', 'r') as csvfile:
    lineal_threshold = csv.reader(csvfile, delimiter=',')
    for row in lineal_threshold:
        k, v = row
        ltc_dict[k] = v

In [4]:
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()


/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app
Out[4]:
node lineal_threshold
0 2 27
1 3 61
2 4 14320
3 5 26
4 6 85

In [5]:
del ltc['node']
del pgr['node']
del deg['node']
del clos['node']
del bet['node']

In [6]:
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
influence_model.head()


/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app
Out[6]:
node katz_centrality lineal_threshold pagerank degree closs betweenness
0 2 0.001769 27 0.000003 0.000004 0.000000 0.000000
1 3 0.001769 61 0.000003 0.000004 0.000000 0.000000
2 4 0.003088 14320 0.000004 0.000320 0.000329 0.000002
3 5 0.001608 26 0.000003 0.000094 0.000096 0.000000
4 6 0.001608 85 0.000003 0.000324 0.000324 0.000000

Standard deviation (raw)


In [7]:
del influence_model['node']
influence_model.std()


Out[7]:
katz_centrality        0.000572
lineal_threshold    2342.605729
pagerank               0.000001
degree                 0.000152
closs                  0.003662
betweenness            0.000003
dtype: float64

In [8]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))


[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015

Normalized (max-score)


In [9]:
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 53015)

In [10]:
influence_model.std()


Out[10]:
katz_centrality     0.000572
lineal_threshold    0.044188
pagerank            0.000001
degree              0.000152
closs               0.003662
betweenness         0.000003
dtype: float64

In [11]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))


[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015

Normalized (nodes)

We need to reload the values


In [21]:
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()


/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app
Out[21]:
node lineal_threshold
0 2 27
1 3 61
2 4 14320
3 5 26
4 6 85

In [22]:
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
del influence_model['node']
influence_model.head()


/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app
Out[22]:
katz_centrality lineal_threshold pagerank degree closs betweenness
0 0.001769 27 0.000003 0.000004 0.000000 0.000000
1 0.001769 61 0.000003 0.000004 0.000000 0.000000
2 0.003088 14320 0.000004 0.000320 0.000329 0.000002
3 0.001608 26 0.000003 0.000094 0.000096 0.000000
4 0.001608 85 0.000003 0.000324 0.000324 0.000000

In [23]:
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 256490)
influence_model.head()


Out[23]:
katz_centrality lineal_threshold pagerank degree closs betweenness
0 0.001769 0.000105 0.000003 0.000004 0.000000 0.000000
1 0.001769 0.000238 0.000003 0.000004 0.000000 0.000000
2 0.003088 0.055831 0.000004 0.000320 0.000329 0.000002
3 0.001608 0.000101 0.000003 0.000094 0.000096 0.000000
4 0.001608 0.000331 0.000003 0.000324 0.000324 0.000000

In [24]:
influence_model.std()


Out[24]:
katz_centrality     0.000572
lineal_threshold    0.009133
pagerank            0.000001
degree              0.000152
closs               0.003662
betweenness         0.000003
dtype: float64

In [25]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))


[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015

In [ ]: