Centrality stats



In [1]:

    
import pandas as pd
import csv



In [2]:

    
#Retweet Network
path = '../../data/processed'

ktz = pd.read_csv(path + '/katz_centrality_1e1.csv')
pgr = pd.read_csv(path + '/page_rank.csv')
deg = pd.read_csv(path + '/degree_centrality.csv')
clos = pd.read_csv(path +'/closseness_centrality.csv')
bet = pd.read_csv(path +'/betweenness_centrality.csv')



In [3]:

    
ltc_dict = {}
with open(path + '/lt_centrality.csv', 'r') as csvfile:
    lineal_threshold = csv.reader(csvfile, delimiter=',')
    for row in lineal_threshold:
        k, v = row
        ltc_dict[k] = v



In [4]:

    
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()









    



/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app






    Out[4]:






  
    
      
      node
      lineal_threshold
    
  
  
    
      0
      2
      27
    
    
      1
      3
      61
    
    
      2
      4
      14320
    
    
      3
      5
      26
    
    
      4
      6
      85



In [5]:

    
del ltc['node']
del pgr['node']
del deg['node']
del clos['node']
del bet['node']



In [6]:

    
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
influence_model.head()









    



/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app






    Out[6]:






  
    
      
      node
      katz_centrality
      lineal_threshold
      pagerank
      degree
      closs
      betweenness
    
  
  
    
      0
      2
      0.001769
      27
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      1
      3
      0.001769
      61
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      2
      4
      0.003088
      14320
      0.000004
      0.000320
      0.000329
      0.000002
    
    
      3
      5
      0.001608
      26
      0.000003
      0.000094
      0.000096
      0.000000
    
    
      4
      6
      0.001608
      85
      0.000003
      0.000324
      0.000324
      0.000000

Standard deviation (raw)



In [7]:

    
del influence_model['node']
influence_model.std()









    Out[7]:





katz_centrality        0.000572
lineal_threshold    2342.605729
pagerank               0.000001
degree                 0.000152
closs                  0.003662
betweenness            0.000003
dtype: float64



In [8]:

    
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))









    



[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015

Normalized (max-score)



In [9]:

    
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 53015)



In [10]:

    
influence_model.std()









    Out[10]:





katz_centrality     0.000572
lineal_threshold    0.044188
pagerank            0.000001
degree              0.000152
closs               0.003662
betweenness         0.000003
dtype: float64



In [11]:

    
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))









    



[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015

Normalized (nodes)

We need to reload the values



In [21]:

    
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()









    



/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app






    Out[21]:






  
    
      
      node
      lineal_threshold
    
  
  
    
      0
      2
      27
    
    
      1
      3
      61
    
    
      2
      4
      14320
    
    
      3
      5
      26
    
    
      4
      6
      85



In [22]:

    
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
del influence_model['node']
influence_model.head()









    



/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py:2: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  from ipykernel import kernelapp as app






    Out[22]:






  
    
      
      katz_centrality
      lineal_threshold
      pagerank
      degree
      closs
      betweenness
    
  
  
    
      0
      0.001769
      27
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      1
      0.001769
      61
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      2
      0.003088
      14320
      0.000004
      0.000320
      0.000329
      0.000002
    
    
      3
      0.001608
      26
      0.000003
      0.000094
      0.000096
      0.000000
    
    
      4
      0.001608
      85
      0.000003
      0.000324
      0.000324
      0.000000



In [23]:

    
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 256490)
influence_model.head()









    Out[23]:






  
    
      
      katz_centrality
      lineal_threshold
      pagerank
      degree
      closs
      betweenness
    
  
  
    
      0
      0.001769
      0.000105
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      1
      0.001769
      0.000238
      0.000003
      0.000004
      0.000000
      0.000000
    
    
      2
      0.003088
      0.055831
      0.000004
      0.000320
      0.000329
      0.000002
    
    
      3
      0.001608
      0.000101
      0.000003
      0.000094
      0.000096
      0.000000
    
    
      4
      0.001608
      0.000331
      0.000003
      0.000324
      0.000324
      0.000000



In [24]:

    
influence_model.std()









    Out[24]:





katz_centrality     0.000572
lineal_threshold    0.009133
pagerank            0.000001
degree              0.000152
closs               0.003662
betweenness         0.000003
dtype: float64



In [25]:

    
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))









    



[katz_centrality].unique: 774
[lineal_threshold].unique: 7000
[pagerank].unique: 33956
[degree].unique: 375
[closs].unique: 4331
[betweenness].unique: 5015



In [ ]:

	node	katz_centrality	lineal_threshold	pagerank	degree	closs	betweenness
0	2	0.001769	27	0.000003	0.000004	0.000000	0.000000
1	3	0.001769	61	0.000003	0.000004	0.000000	0.000000
2	4	0.003088	14320	0.000004	0.000320	0.000329	0.000002
3	5	0.001608	26	0.000003	0.000094	0.000096	0.000000
4	6	0.001608	85	0.000003	0.000324	0.000324	0.000000

	katz_centrality	lineal_threshold	pagerank	degree	closs	betweenness
0	0.001769	0.000105	0.000003	0.000004	0.000000	0.000000
1	0.001769	0.000238	0.000003	0.000004	0.000000	0.000000
2	0.003088	0.055831	0.000004	0.000320	0.000329	0.000002
3	0.001608	0.000101	0.000003	0.000094	0.000096	0.000000
4	0.001608	0.000331	0.000003	0.000324	0.000324	0.000000