In [1]:
import pandas as pd
import csv
In [2]:
#Retweet Network
path = '../../data/processed'
ktz = pd.read_csv(path + '/katz_centrality_1e1.csv')
pgr = pd.read_csv(path + '/page_rank.csv')
deg = pd.read_csv(path + '/degree_centrality.csv')
clos = pd.read_csv(path +'/closseness_centrality.csv')
bet = pd.read_csv(path +'/betweenness_centrality.csv')
In [3]:
ltc_dict = {}
with open(path + '/lt_centrality.csv', 'r') as csvfile:
lineal_threshold = csv.reader(csvfile, delimiter=',')
for row in lineal_threshold:
k, v = row
ltc_dict[k] = v
In [4]:
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()
Out[4]:
In [5]:
del ltc['node']
del pgr['node']
del deg['node']
del clos['node']
del bet['node']
In [6]:
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
influence_model.head()
Out[6]:
In [7]:
del influence_model['node']
influence_model.std()
Out[7]:
In [8]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))
In [9]:
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 53015)
In [10]:
influence_model.std()
Out[10]:
In [11]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))
In [21]:
ltc = pd.DataFrame([[key,value] for key,value in ltc_dict.items()],columns=["node","lineal_threshold"])
ltc['node'] = ltc['node'].convert_objects(convert_numeric=True)
ltc.sort_values('node', inplace = True)
ltc.reset_index(drop=True, inplace = True)
ltc.head()
Out[21]:
In [22]:
influence_model = pd.concat([ktz,ltc, pgr, deg, clos, bet], axis=1)
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].convert_objects(convert_numeric=True)
del influence_model['node']
influence_model.head()
Out[22]:
In [23]:
# Normalize lineal_threshold
influence_model['lineal_threshold'] = influence_model['lineal_threshold'].astype(float)
influence_model['lineal_threshold'] = (influence_model['lineal_threshold'] / 256490)
influence_model.head()
Out[23]:
In [24]:
influence_model.std()
Out[24]:
In [25]:
print('[katz_centrality].unique: {}'.format(len(influence_model['katz_centrality'].unique())))
print('[lineal_threshold].unique: {}'.format(len(influence_model['lineal_threshold'].unique())))
print('[pagerank].unique: {}'.format(len(influence_model['pagerank'].unique())))
print('[degree].unique: {}'.format(len(influence_model['degree'].unique())))
print('[closs].unique: {}'.format(len(influence_model['closs'].unique())))
print('[betweenness].unique: {}'.format(len(influence_model['betweenness'].unique())))
In [ ]: