In [44]:
%matplotlib inline

In [45]:
import networkx as nx
import pandas as pd
import projx as px
import matplotlib.pyplot as plt

In [46]:
plt.rcParams['figure.figsize'] = (12, 7)

In [47]:
def prob_dist(itrbl):
    count = {}
    for i in itrbl:
        count.setdefault(i, 0)
        count[i] += 1
    sr = pd.Series(count)
    prob = sr.apply(lambda x: float(x) / len(itrbl))
    return prob


def basic_graph_stats(g):
    stats = {
        "num_nodes": len(g),
        "num_edges": len(g.edges()),
        "density": nx.density(g),
        "diameter": nx.diameter(g),
        "avg_short_path": nx.average_shortest_path_length(g),
        "avg_clust": nx.average_clustering(g),
        "transitivity": nx.transitivity(g)
    }
    return pd.Series(stats)

In [48]:
graph = nx.read_gexf("projections/ninety_percent_cut.gexf")

In [49]:
subgraphs = list(nx.connected_component_subgraphs(graph))
print([len(sub) for sub in subgraphs])


[76, 2, 4, 2, 5]

In [50]:
g = subgraphs[0]
g1 = subgraphs[1]
g2 = subgraphs[2]
g3 = subgraphs[3]
g4 = subgraphs[4]

In [51]:
nx.write_gexf(g, "projections/subgraph_ninety_cut.gexf")

In [52]:
basic_graph_stats(g)


Out[52]:
avg_clust           0.693074
avg_short_path      2.892281
density             0.071930
diameter            6.000000
num_edges         205.000000
num_nodes          76.000000
transitivity        0.314860
dtype: float64

In [53]:
print(len(g), len(g.edges()), nx.density(g))


(76, 205, 0.07192982456140351)

In [54]:
plt.rcParams['figure.figsize'] = (17, 12)

In [55]:
px.draw_simple_graph(g, edge_label_attr="")



In [56]:
px.draw_simple_graph(g1, edge_label_attr="")



In [57]:
px.draw_simple_graph(g2, edge_label_attr="")



In [58]:
px.draw_simple_graph(g3, edge_label_attr="")



In [59]:
px.draw_simple_graph(g4, edge_label_attr="")


Centrality


In [60]:
bc = nx.betweenness_centrality(g, weight="weight")
ec = nx.eigenvector_centrality(g, weight="weight", max_iter=500)
cc = nx.closeness_centrality(g)
deg = nx.degree(g)
pr = nx.pagerank(g, max_iter=500, weight="weight")

In [61]:
cent_10_df = pd.DataFrame({
    "bc": [(k, g.node[k]["label"], bc[k]) for k in sorted(bc, key=bc.get, reverse=True)[0:10]],
    "ec": [(k, g.node[k]["label"], ec[k]) for k in sorted(ec, key=ec.get, reverse=True)[0:10]],
    "cc": [(k, g.node[k]["label"], cc[k]) for k in sorted(cc, key=cc.get, reverse=True)[0:10]],
    "dc": [(k, g.node[k]["label"], deg[k]) for k in sorted(deg, key=deg.get, reverse=True)[0:10]],
    "pr": [(k, g.node[k]["label"], pr[k]) for k in sorted(pr, key=pr.get, reverse=True)[0:10]]
})

In [62]:
print(cent_10_df)


                                                  bc  \
0   (440, Felix Lope de Vega Carpio, 0.405405405405)   
1  (208, Miguel de Cervantes Saavedra, 0.35567567...   
2  (100, Gutierre de Cetina, Vicario General, 0.1...   
3                  (70, Mateo Aleman, 0.14978978979)   
4             (362, Huberto Antonio, 0.141981981982)   
5                (18, Roger Velpius, 0.132972972973)   
6  (215, Francisco Murcia de la Llana, 0.12288288...   
7             (352, Marcos Teixiera, 0.120900900901)   
8          (331, Jose de Valdivielso, 0.10990990991)   
9          (209, Ruy Pirez da Veiga, 0.094954954955)   

                                                  cc  \
0  (215, Francisco Murcia de la Llana, 0.53571428...   
1  (100, Gutierre de Cetina, Vicario General, 0.5...   
2           (208, Miguel de Cervantes Saavedra, 0.5)   
3   (440, Felix Lope de Vega Carpio, 0.487012987013)   
4           (212, Juan de la Cuesta, 0.474683544304)   
5                    (362, Huberto Antonio, 0.46875)   
6         (331, Jose de Valdivielso, 0.438596491228)   
7  (184, Pedro Fernandez de Castro, Conde de Lemo...   
8                (18, Roger Velpius, 0.433526011561)   
9         (214, Hernando de Vallejo, 0.421348314607)   

                                                  dc  \
0               (440, Felix Lope de Vega Carpio, 34)   
1            (215, Francisco Murcia de la Llana, 26)   
2            (208, Miguel de Cervantes Saavedra, 23)   
3     (100, Gutierre de Cetina, Vicario General, 20)   
4                     (331, Jose de Valdivielso, 13)   
5  (184, Pedro Fernandez de Castro, Conde de Lemo...   
6                       (212, Juan de la Cuesta, 11)   
7                   (246, Pedro de Padilla, Fray, 8)   
8                              (70, Mateo Aleman, 8)   
9  (138, Alfonso Lopez de Zuniga y Perez de Guzma...   

                                                  ec  \
0  (215, Francisco Murcia de la Llana, 0.46839778...   
1  (208, Miguel de Cervantes Saavedra, 0.35914696...   
2   (440, Felix Lope de Vega Carpio, 0.324028994627)   
3  (100, Gutierre de Cetina, Vicario General, 0.3...   
4           (212, Juan de la Cuesta, 0.275496104774)   
5  (184, Pedro Fernandez de Castro, Conde de Lemo...   
6          (331, Jose de Valdivielso, 0.19371734585)   
7  (138, Alfonso Lopez de Zuniga y Perez de Guzma...   
8         (214, Hernando de Vallejo, 0.141732955367)   
9      (246, Pedro de Padilla, Fray, 0.141279228553)   

                                                  pr  
0  (440, Felix Lope de Vega Carpio, 0.0913271805648)  
1  (215, Francisco Murcia de la Llana, 0.06762283...  
2  (208, Miguel de Cervantes Saavedra, 0.05370848...  
3  (100, Gutierre de Cetina, Vicario General, 0.0...  
4  (184, Pedro Fernandez de Castro, Conde de Lemo...  
5        (331, Jose de Valdivielso, 0.0276538628563)  
6          (212, Juan de la Cuesta, 0.0238471966926)  
7                (70, Mateo Aleman, 0.0234550261592)  
8     (246, Pedro de Padilla, Fray, 0.0222343004712)  
9            (433, Alonso de Barros, 0.020160796306)  

In [63]:
pd.Series(deg.values()).hist()


Out[63]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ea6672d0>

In [64]:
deg_prob = prob_dist(deg.values())
plt.scatter(deg_prob.index, deg_prob)


Out[64]:
<matplotlib.collections.PathCollection at 0x7f45ea808f90>

In [65]:
pd.Series(bc.values()).hist()


Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ea808f50>

In [66]:
pd.Series(cc.values()).hist()


Out[66]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ea65e050>

In [67]:
pd.Series(ec.values()).hist()


Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ea86b250>

In [68]:
pd.Series(pr.values()).hist()


Out[68]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ea554a10>

Assortativity

Degree


In [69]:
nx.degree_assortativity_coefficient(g)


Out[69]:
-0.2576662682255253

In [70]:
r = nx.degree_assortativity_coefficient(g)
print("%3.1f"%r)


-0.3

In [71]:
nodes_by_deg = sorted(deg, key=deg.get, reverse=True)
mtrx = nx.to_numpy_matrix(g, nodelist=nodes_by_deg)

In [72]:
weight_sr = pd.Series([attrs["weight"] for s, t, attrs in g.edges(data=True)])
weight_sr.describe()


Out[72]:
count    205.000000
mean       0.672976
std        0.344801
min        0.350000
25%        0.450000
50%        0.538346
75%        0.755556
max        2.602381
dtype: float64

In [73]:
quant = weight_sr.quantile(.75)

In [74]:
plt.rcParams['figure.figsize'] = (17, 12)

In [75]:
heatmap = plt.imshow(mtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()


Out[75]:
<matplotlib.colorbar.Colorbar instance at 0x7f45eccab878>

In [76]:
stripmtrx = mtrx[:, :35]

In [77]:
heatmap = plt.imshow(stripmtrx)
heatmap.set_clim(0.0, quant)
plt.colorbar()


Out[77]:
<matplotlib.colorbar.Colorbar instance at 0x7f45ec4283f8>

Attribute


In [78]:
place = nx.attribute_assortativity_coefficient(g, "top_place")

In [79]:
genre = nx.attribute_assortativity_coefficient(g, "top_genre")

In [80]:
role = nx.attribute_assortativity_coefficient(g, "role")

In [81]:
assort_df = pd.Series({
    "deg": r,
    "role": role,
    "place": place,
    "genre": genre
})
assort_df


Out[81]:
deg     -0.257666
genre    0.326437
place    0.201207
role    -0.117883
dtype: float64

Clustering


In [82]:
nx.average_clustering(g)


Out[82]:
0.6930736945241447

In [83]:
pd.Series(nx.clustering(g).values()).hist()


Out[83]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f45ec576f90>

In [84]:
nx.transitivity(g)


Out[84]:
0.3148602256007847

Paths


In [85]:
nx.diameter(g)


Out[85]:
6

In [86]:
nx.average_shortest_path_length(g)


Out[86]:
2.892280701754386