In [1]:
%matplotlib inline
In [2]:
import networkx as nx
import pandas as pd
import projx as px
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as clrs
In [3]:
plt.rcParams['figure.figsize'] = (12, 7)
In [4]:
def prob_dist(itrbl):
count = {}
for i in itrbl:
count.setdefault(i, 0)
count[i] += 1
sr = pd.Series(count)
prob = sr.apply(lambda x: float(x) / len(itrbl))
return prob
def basic_graph_stats(g):
stats = {
"num_nodes": len(g),
"num_edges": len(g.edges()),
"density": nx.density(g),
"diameter": nx.diameter(g),
"avg_short_path": nx.average_shortest_path_length(g),
"avg_clust": nx.average_clustering(g),
"transitivity": nx.transitivity(g)
}
return pd.Series(stats)
In [5]:
graph = nx.read_gexf("projections/fourteen_percent_cut.gexf")
In [6]:
subgraphs = list(nx.connected_component_subgraphs(graph))
print([len(sub) for sub in subgraphs])
In [7]:
g = subgraphs[0]
g1 = subgraphs[1]
g2 = subgraphs[2]
In [8]:
nx.write_gexf(g, "projections/subgraph_fourteen_cut.gexf")
In [9]:
basic_graph_stats(g)
Out[9]:
In [10]:
px.draw_simple_graph(g1)
In [11]:
px.draw_simple_graph(g2, edge_label_attr="")
In [12]:
print(len(g), len(g.edges()), nx.density(g))
In [13]:
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg = nx.degree(g)
pr = nx.pagerank(g, max_iter=500, weight="weight")
In [14]:
cent_10_df = pd.DataFrame({
"bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
"ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
"cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
"dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
"pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]
})
In [15]:
print(cent_10_df)
In [32]:
pd.Series(deg.values()).hist()
Out[32]:
In [33]:
deg_prob = prob_dist(deg.values())
plt.scatter(deg_prob.index, deg_prob)
Out[33]:
In [34]:
pd.Series(bc.values()).hist()
Out[34]:
In [35]:
pd.Series(cc.values()).hist()
Out[35]:
In [36]:
pd.Series(ec.values()).hist()
Out[36]:
In [37]:
pd.Series(pr.values()).hist()
Out[37]:
In [38]:
nx.degree_assortativity_coefficient(g)
Out[38]:
In [39]:
r = nx.degree_assortativity_coefficient(g)
print("%3.1f"%r)
In [40]:
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)
In [41]:
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])
weight_sr.describe()
Out[41]:
In [42]:
quant = weight_sr.quantile(.75)
In [43]:
plt.rcParams['figure.figsize'] = (17, 12)
In [44]:
#colors = [('purple')] + [(cm.jet(i)) for i in xrange(1,256)]
#new_map = clrs.LinearSegmentedColormap.from_list('new_map', colors, N=256)
heatmap = plt.imshow(mtrx, interpolation='nearest')
heatmap.set_clim(0.0, quant)
plt.colorbar()
plt.savefig("img/cutlines_deg_assort.png")
In [45]:
stripmtrx = mtrx[:, :100]
In [46]:
heatmap = plt.imshow(stripmtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()
Out[46]:
In [47]:
zoommtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)[:50, :50]
In [48]:
zoomquant = pd.Series(zoommtrx.flatten().tolist()[0]).quantile(0.9)
In [49]:
heatmap = plt.imshow(zoommtrx)
heatmap.set_clim(0.0, zoomquant)
plt.colorbar()
Out[49]:
In [50]:
place = nx.attribute_assortativity_coefficient(g, "top_place")
In [51]:
genre = nx.attribute_assortativity_coefficient(g, "top_genre")
In [52]:
role = nx.attribute_assortativity_coefficient(g, "role")
In [53]:
assort_df = pd.Series({
"deg": r,
"role": role,
"place": place,
"genre": genre
})
assort_df
Out[53]:
In [54]:
nx.average_clustering(g)
Out[54]:
In [55]:
pd.Series(nx.clustering(g).values()).hist()
Out[55]:
In [56]:
nx.transitivity(g)
Out[56]:
In [57]:
nx.diameter(g)
Out[57]:
In [58]:
nx.average_shortest_path_length(g)
Out[58]: