In [1]:
%reload_ext gremlin
%matplotlib inline
import os
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from draw_graph import draw_simple_graph # A utility function that uses NetworkX plotting API
In [2]:
dir_path = os.path.dirname(os.path.realpath('__file__'))
file_path = os.path.join(dir_path, 'grateful-dead.xml')
%gremlin graph.io(graphml()).readGraph(file_path)
Out[2]:
In [3]:
num_verts = %gremlin g.V().count()
num_verts
Out[3]:
In [4]:
%gremlin g.E().count()
Out[4]:
In [5]:
deg_dist = %gremlin g.V().groupCount().by(both().count())
In [6]:
degree = map(lambda x: int(x), deg_dist.results.keys())
prob = map(lambda x: x / num_verts.results, deg_dist.results.values())
plt.scatter(list(degree), list(prob))
Out[6]:
In [7]:
label_count = %gremlin g.V().label().groupCount()
In [8]:
label_count.dataframe.plot(kind='bar', color=['c', 'b'])
Out[8]:
In [9]:
label_count = %gremlin g.E().label().groupCount()
label_count.dataframe.plot(kind='bar', color=['c', 'b', 'r'])
Out[9]:
In [10]:
artist = %gremlin g.V().hasLabel('artist').order().by(inE().count(), decr).limit(1)
vid = artist.results.id
In [11]:
%gremlin g.V(vid).valueMap(true)
Out[11]:
In [12]:
%gremlin g.V(vid).inE().count()
Out[12]:
In [13]:
jerrys_labels = %gremlin g.V(vid).inE().label().groupCount()
jerrys_labels.dataframe.plot(kind='bar', color=['c', 'b'])
Out[13]:
In [14]:
jerrys_ego_net = %gremlin g.V(vid).bothE()
In [15]:
graph = jerrys_ego_net.graph
print(len(graph.nodes()), len(graph.edges()))
In [16]:
nodes = graph.nodes()
names = %gremlin g.V(nodes).properties('name')
labels = %gremlin g.V(nodes).label()
In [17]:
# Add names/labels to nodes
name_map = {}
label_map = {}
for i in range(len(nodes)):
node = nodes[i]
name_map[node] = names[i].value
label_map[node] = labels[i]
nx.set_node_attributes(graph, 'name', name_map)
nx.set_node_attributes(graph, 'label', label_map)
In [18]:
plt.rcParams['figure.figsize'] = (18, 12)
draw_simple_graph(graph,
node_type_attr='label',
edge_label_attr='',
show_edge_labels=False,
label_attrs=['name'],
k=0.005)
In [19]:
edges = %gremlin g.E()
In [20]:
full_graph = edges.graph
print(len(full_graph.nodes()), len(full_graph.edges()))
In [21]:
bc = nx.betweenness_centrality(full_graph)
cc = nx.closeness_centrality(full_graph)
dc = nx.degree_centrality(full_graph)
In [22]:
cent_df = pd.DataFrame({'closeness': cc, 'betweenness': bc, 'degree': dc})
cent_df.describe()
Out[22]: