In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob
pd.set_option('display.mpl_style', 'default')
# display all the columns
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)
gml_files = glob('../output_join/article_pos1.gml')
def calculate_graph_inf(graph):
graph.name = filename
info = nx.info(graph)
print info
## plot spring layout
#plt.figure(figsize=(10,10))
#nx.draw_spring(graph, arrows=True, with_labels=True)
def highest_centrality(cent_dict):
"""Returns a tuple (node,value) with the node
with largest value from centrality dictionary."""
# create ordered tuple of centrality data
cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
# sort in descending order
cent_items.sort()
cent_items.reverse()
return tuple(reversed(cent_items[0]))
In [2]:
# create empty dataframe with columns
data_columns = ['name',
'sentiment'
]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()
In [3]:
# graph = directed, ugraph = undirected
for graph_num, gml_graph in enumerate(gml_files):
graph = nx.read_gml(gml_graph)
ugraph = graph.to_undirected()
# adding missing edges back
U = graph.to_undirected(reciprocal=True)
e = U.edges()
ugraph.add_edges_from(e)
(filepath, filename) = os.path.split(gml_graph)
print('-' * 40)
print(gml_graph)
#calculate_graph_inf(graph)
#calculate_graph_inf(ugraph)
# calculate variables and save into list
sent = filepath.split('/')[-1]
deg_cent = nx.degree_centrality(graph)
bet_cent = nx.betweenness_centrality(graph)
clo_cent = nx.closeness_centrality(graph)
graph_values = {'name':filename,
'sentiment':sent,
}
data = data.append(graph_values, ignore_index=True)
#
degree = nx.degree(graph)
deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
deg_df.columns = ['degree']
# degree centrality
deg_cent = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
dc_df.columns = ['degree centrality']
# betweenness centrality
bet_cent = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
bc_df.columns = ['betweenness centrality']
# closeness centrality
clo_cent = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
cc_df.columns = ['closeness centrality']
# concat node frames into node_df
frames = [deg_df, dc_df, bc_df, cc_df]
node_df = pd.concat(frames, axis = 1)
node_df.index.name = 'node'
node_df = node_df.reset_index()
values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
# df = merges graph_values with node_df for single graph and fill NaNs
df = pd.concat([values, node_df], axis = 1)
df = df.fillna(method='ffill')
# append to combined_df
combined_df = combined_df.append(df)
# if graph_num == 2:
# break
In [4]:
combined_df
Out[4]:
In [5]:
# save dataframe to csv
combined_df.to_csv('neg1_df.csv', encoding = 'utf-8')
In [ ]:
# split into sub-tables
neg_node_df = combined_df[combined_df['sentiment'] == 'negative']
pos_node_df = combined_df[combined_df['sentiment'] == 'positive']
neu_node_df = combined_df[combined_df['sentiment'] == 'neutral']
In [ ]:
neg_node_df.to_csv('negative_node_df.csv', encoding = 'utf-8')
pos_node_df.to_csv('positive_node_df.csv', encoding = 'utf-8')
neu_node_df.to_csv('neutral_node_df.csv', encoding = 'utf-8')
In [6]:
df = pd.read_csv('neg1_df.csv')
df
Out[6]:
In [ ]:
In [ ]: