In [1]:
import pandas as pd
import csv
In [6]:
#Retweet Network
path = '../../data/processed/centrality_measures'
path_ltc = '../../data/processed/LTC'
ktz = pd.read_csv(path + '/katz_centrality_1e1.csv')
ltcA = pd.read_csv(path_ltc + '/lt_centrality_alternative.csv')
pgr = pd.read_csv(path + '/page_rank.csv')
deg = pd.read_csv(path + '/degree_centrality.csv')
clos = pd.read_csv(path +'/closseness_centrality.csv')
bet = pd.read_csv(path +'/betweenness_centrality.csv')
In [3]:
#ltc = (ltc - ltc.mean()) / (ltc.max() - ltc.min())
In [8]:
ltc_dict = {}
with open(path_ltc + '/lt_centrality.csv', 'r') as csvfile:
lineal_threshold = csv.reader(csvfile, delimiter=',')
for row in lineal_threshold:
k, v = row
ltc_dict[k] = v
In [9]:
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()
Out[9]:
In [10]:
del ltcA['node']
del ltc['node']
del pgr['node']
del deg['node']
del clos['node']
del bet['node']
In [11]:
influence_model = pd.concat([ktz,ltc, ltcA, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
influence_model.head()
Out[11]:
In [12]:
# select columns to plot
influence_model = influence_model[['node', 'lineal_threshold', 'katz_centrality', 'pagerank']]
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 256490)
In [13]:
influence_model
Out[13]:
In [14]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (12, 7)
In [17]:
influence_model = influence_model.rename(index=str, columns={"lineal_threshold": "LTR",
"pagerank": "PageRank",
"katz_centrality":"Katz centrality"})
In [18]:
influence_model['node'] = influence_model['node'].astype(str)
_ = influence_model.plot(subplots=True)
In [11]:
#_ = influence_model.plot.bar()
In [12]:
influence_model.corr()
Out[12]:
In [13]:
influence_model.corr(method='spearman')
Out[13]:
In [14]:
influence_model.corr(method='kendall')
Out[14]: