In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob
plt.style.use('ggplot')
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)
In [2]:
def calculate_graph_inf(graph):
graph.name = filename
info = nx.info(graph)
print info
def highest_centrality(cent_dict):
"""Returns a tuple (node,value) with the node
with largest value from centrality dictionary."""
# create ordered tuple of centrality data
cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
# sort in descending order
cent_items.sort()
cent_items.reverse()
return tuple(reversed(cent_items[0]))
In [3]:
# run undirected
gml_files = glob('../output/network/article_u_*.gml')
# load undirected
#gml_files = glob('../output/network/article_u_pos.gml')
#gml_files = glob('../output/network/article_u_neg.gml')
#gml_files = glob('../output/network/article_u_neu.gml')
In [4]:
gml_files
Out[4]:
In [5]:
network_data_columns = ['name',
#'sentiment',
'# nodes',
'# edges',
#'avg deg',
'density',
'deg assort coef',
'avg deg cent',
'avg bet cent',
'avg clo cent',
'high deg cent',
'high bet cent',
'high clo cent',
'avg node conn',
'# conn comp',
'gc size'
]
network_data = pd.DataFrame(columns = network_data_columns)
In [6]:
for graph_num, gml_graph in enumerate(gml_files):
graph = nx.read_gml(gml_graph)
(filepath, filename) = os.path.split(gml_graph)
print('-' * 10)
print(gml_graph)
calculate_graph_inf(graph)
# change
#sent = ""
nodes = nx.number_of_nodes(graph)
edges = nx.number_of_edges(graph)
density = float("{0:.4f}".format(nx.density(graph)))
avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
#avg_deg = float("{0:.4f}".format(in_deg + out_deg))
avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
#conn_comp = nx.number_weakly_connected_components(graph) # not for undirected
deg_cen = nx.degree_centrality(graph)
bet_cen = nx.betweenness_centrality(graph)
clo_cen = nx.closeness_centrality(graph)
highest_deg_cen = highest_centrality(deg_cen)
highest_bet_cen = highest_centrality(bet_cen)
highest_clo_cen = highest_centrality(clo_cen)
#Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))
# save variables into list
graph_values = {'name':filename,
#'sentiment':sent,
'# nodes':nodes,
'# edges':edges,
#'avg deg':avg_deg,
'density':density,
'deg assort coef':deg_assort_coeff,
'avg deg cent':"%.4f" % avg_deg_cen,
'avg bet cent':"%.4f" % avg_bet_cen,
'avg clo cent':"%.4f" % avg_clo_cen,
'high deg cent':highest_deg_cen,
'high bet cent':highest_bet_cen,
'high clo cent':highest_clo_cen,
'avg node conn':avg_node_con
#'# conn comp':conn_comp,
#'gc size':Gc
}
network_data = network_data.append(graph_values, ignore_index=True)
In [7]:
# print network data
network_data
Out[7]:
In [8]:
# save
#network_data.to_csv('../output/df/all-stats-undirected.csv')
In [11]:
# run directed
gml_files = glob('../output/network/article_*1.gml')
# load directed
#gml_files = glob('../output/network/article_pos1.gml')
#gml_files = glob('../output/network/article_neg1.gml')
#gml_files = glob('../output/network/article_neu1.gml')
In [12]:
gml_files
Out[12]:
In [13]:
for graph_num, gml_graph in enumerate(gml_files):
graph = nx.read_gml(gml_graph)
(filepath, filename) = os.path.split(gml_graph)
print('-' * 10)
print(gml_graph)
calculate_graph_inf(graph)
# change
#sent = ""
nodes = nx.number_of_nodes(graph)
edges = nx.number_of_edges(graph)
density = float("{0:.4f}".format(nx.density(graph)))
avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
#avg_deg = float("{0:.4f}".format(in_deg + out_deg))
avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
#conn_comp = nx.number_weakly_connected_components(graph) # not for undirected
deg_cen = nx.degree_centrality(graph)
bet_cen = nx.betweenness_centrality(graph)
clo_cen = nx.closeness_centrality(graph)
highest_deg_cen = highest_centrality(deg_cen)
highest_bet_cen = highest_centrality(bet_cen)
highest_clo_cen = highest_centrality(clo_cen)
#Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))
# save variables into list
graph_values = {'name':filename,
#'sentiment':sent,
'# nodes':nodes,
'# edges':edges,
#'avg deg':avg_deg,
'density':density,
'deg assort coef':deg_assort_coeff,
'avg deg cent':"%.4f" % avg_deg_cen,
'avg bet cent':"%.4f" % avg_bet_cen,
'avg clo cent':"%.4f" % avg_clo_cen,
'high deg cent':highest_deg_cen,
'high bet cent':highest_bet_cen,
'high clo cent':highest_clo_cen,
'avg node conn':avg_node_con
#'# conn comp':conn_comp,
#'gc size':Gc
}
network_data = network_data.append(graph_values, ignore_index=True)
In [14]:
# print network data
network_data
Out[14]:
In [15]:
# save
#network_data.to_csv('../output/df/all-stats-directed.csv')
In [ ]:
# run directed
#gml_files = glob('../output/network/article_*.gml')
# run undirected
#gml_files = glob('../output/network/article_u_*.gml')
# load directed
#gml_files = glob('../output/network/article_pos1.gml')
#gml_files = glob('../output/network/article_neg1.gml')
#gml_files = glob('../output/network/article_neu1.gml')
# load undirected
#gml_files = glob('../output/network/article_u_pos.gml')
#gml_files = glob('../output/network/article_u_neg.gml')
#gml_files = glob('../output/network/article_u_neu.gml')
In [ ]:
data_columns = ['name'
]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()
In [ ]:
for graph_num, gml_graph in enumerate(gml_files):
graph = nx.read_gml(gml_graph)
(filepath, filename) = os.path.split(gml_graph)
print('-' * 10)
print(gml_graph)
calculate_graph_inf(graph)
## calculate variables and save into list
#sent = ""
deg_cent = nx.degree_centrality(graph)
bet_cent = nx.betweenness_centrality(graph)
clo_cent = nx.closeness_centrality(graph)
graph_values = {'name':filename,
'sentiment':sent
}
data = data.append(graph_values, ignore_index=True)
degree = nx.degree(graph)
deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
deg_df.columns = ['degree']
# degree centrality
deg_cent = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
dc_df.columns = ['deg cent']
# betweenness centrality
bet_cent = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
bc_df.columns = ['bet cent']
# closeness centrality
clo_cent = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
cc_df.columns = ['clo cent']
# concat node frames into node_df
frames = [deg_df, dc_df, bc_df, cc_df]
node_df = pd.concat(frames, axis = 1)
node_df.index.name = 'node'
node_df = node_df.reset_index()
values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
# df = merges graph_values with node_df for single graph and fill NaNs
df = pd.concat([values, node_df], axis = 1)
df = df.fillna(method='ffill')
combined_df = combined_df.append(df)
In [ ]:
# print entire network
combined_df